mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'kovidgoyal/master'
This commit is contained in:
commit
051e27bad4
8
.gitattributes
vendored
8
.gitattributes
vendored
@ -1,13 +1,14 @@
|
||||
# Tell git what files are txt
|
||||
*.py text
|
||||
*.recipe text
|
||||
*.recipe text diff=python
|
||||
*.coffee text
|
||||
*.js text
|
||||
*.pot text
|
||||
*.po text
|
||||
*.html text
|
||||
*.css text
|
||||
*.xhtml text
|
||||
*.htm text
|
||||
*.css text
|
||||
*.rst text
|
||||
*.md text
|
||||
*.txt text
|
||||
@ -20,12 +21,13 @@
|
||||
*.tmpl text
|
||||
*.qrc text
|
||||
*.sh text
|
||||
*.xhtml text
|
||||
*.fb2 text
|
||||
*.bat text eol=crlf
|
||||
|
||||
# Tell git what files are binary
|
||||
*.zip binary
|
||||
*.epub binary
|
||||
*.docx binary
|
||||
*.jpg binary
|
||||
*.jpeg binary
|
||||
*.png binary
|
||||
|
54
.gitignore
vendored
54
.gitignore
vendored
@ -39,54 +39,6 @@ nbproject/
|
||||
.settings/
|
||||
*.DS_Store
|
||||
calibre_plugins/
|
||||
recipes/.git
|
||||
recipes/.gitignore
|
||||
recipes/README.md
|
||||
recipes/icon_checker.py
|
||||
recipes/readme_updater.py
|
||||
recipes/garfield.recipe
|
||||
recipes/katalog_egazeciarz.recipe
|
||||
recipes/tv_axnscifi.recipe
|
||||
recipes/tv_comedycentral.recipe
|
||||
recipes/tv_discoveryscience.recipe
|
||||
recipes/tv_foxlife.recipe
|
||||
recipes/tv_fox.recipe
|
||||
recipes/tv_hbo.recipe
|
||||
recipes/tv_kinopolska.recipe
|
||||
recipes/tv_nationalgeographic.recipe
|
||||
recipes/tv_polsat2.recipe
|
||||
recipes/tv_polsat.recipe
|
||||
recipes/tv_tv4.recipe
|
||||
recipes/tv_tvn7.recipe
|
||||
recipes/tv_tvn.recipe
|
||||
recipes/tv_tvp1.recipe
|
||||
recipes/tv_tvp2.recipe
|
||||
recipes/tv_tvphd.recipe
|
||||
recipes/tv_tvphistoria.recipe
|
||||
recipes/tv_tvpkultura.recipe
|
||||
recipes/tv_tvppolonia.recipe
|
||||
recipes/tv_tvpuls.recipe
|
||||
recipes/tv_viasathistory.recipe
|
||||
recipes/icons/katalog_egazeciarz.png
|
||||
recipes/icons/garfield.png
|
||||
recipes/icons/tv_axnscifi.png
|
||||
recipes/icons/tv_comedycentral.png
|
||||
recipes/icons/tv_discoveryscience.png
|
||||
recipes/icons/tv_foxlife.png
|
||||
recipes/icons/tv_fox.png
|
||||
recipes/icons/tv_hbo.png
|
||||
recipes/icons/tv_kinopolska.png
|
||||
recipes/icons/tv_nationalgeographic.png
|
||||
recipes/icons/tv_polsat2.png
|
||||
recipes/icons/tv_polsat.png
|
||||
recipes/icons/tv_tv4.png
|
||||
recipes/icons/tv_tvn7.png
|
||||
recipes/icons/tv_tvn.png
|
||||
recipes/icons/tv_tvp1.png
|
||||
recipes/icons/tv_tvp2.png
|
||||
recipes/icons/tv_tvphd.png
|
||||
recipes/icons/tv_tvphistoria.png
|
||||
recipes/icons/tv_tvpkultura.png
|
||||
recipes/icons/tv_tvppolonia.png
|
||||
recipes/icons/tv_tvpuls.png
|
||||
recipes/icons/tv_viasathistory.png
|
||||
recipes/*.mobi
|
||||
recipes/*.epub
|
||||
recipes/debug
|
||||
|
@ -1,4 +1,823 @@
|
||||
|
||||
- version: 0.8.69
|
||||
date: 2012-09-14
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Add a button to the toolbar to switch themes easily"
|
||||
tickets: [1047992]
|
||||
|
||||
- title: "When downloading metadata for many books, if some of them fail, add an option to the downloaded message to show the failed books in the main book list, so that they can be individually processed easily"
|
||||
|
||||
- title: "Remember last used window size of the conversion dialogs."
|
||||
tickets: [1049265]
|
||||
|
||||
- title: "Kindle driver: Turn on sending of azw3 files to kindles by default, since the KK now has azw3 support"
|
||||
|
||||
- title: "Conversion: Add support for CSS pseudo classes :hover, :link, :visited, :first-line, :focus, :active, :first-letter"
|
||||
|
||||
- title: "Wireless device driver: Make the default save template not use folders"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix a regression in th previous release that broke sending of books to the second SD card in SONY readers"
|
||||
tickets: [1047992]
|
||||
|
||||
- title: "Fix a memory leak when scanning for devices in windows"
|
||||
|
||||
- title: "Ebook-viewer: When displaying mathematics, reflow equations that dont fit on a single line"
|
||||
|
||||
- title: "Catalogs: Do not mark the AZW3 catalog as a periodical, as most Kindle devices cannot handle AZW3 periodicals"
|
||||
|
||||
- title: "Content server: When using a custom IP address to listen on via Preferences->Tweaks advertise that IP address via BonJour."
|
||||
|
||||
- title: "Fix ebook catalog generation on linux systems where the encoding is not UTF-8."
|
||||
tickets: [1048404]
|
||||
|
||||
improved recipes:
|
||||
- De Volksrant
|
||||
- Metro UK
|
||||
- Countryfile
|
||||
- Die Zeit (subscription)
|
||||
- Birmingham post
|
||||
|
||||
new recipes:
|
||||
- title: History Today
|
||||
author: Rick Shang
|
||||
|
||||
- version: 0.8.68
|
||||
date: 2012-09-07
|
||||
|
||||
new features:
|
||||
- title: "Drivers for the Nokia N9, Viewsonic 7e, Prestigio PER3274B and Coby Kyros 7035 "
|
||||
tickets: [1046794,1046544]
|
||||
|
||||
- title: "Add a tutorial on creating catalogs to the User Manual and a link to it in the create catalogs dialog"
|
||||
|
||||
- title: "Wireless device connections: Add an option to force calibre to listen on a particular IP address. Access it by customizing the plugin in Preferences->Plugins"
|
||||
|
||||
- title: "Android driver: Add an extra customization option to configure the directory to which ebooks are sent on the storage cards."
|
||||
tickets: [1045045]
|
||||
|
||||
- title: "Add an option under Preferences->Look & Feel->Book Details to hide the cover in the book details panel"
|
||||
|
||||
- title: "The Calibre Companion Android app that allows wireless connection of Android device to calibre is out of beta. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix sorting by author not working in the device view in calibre when connected to iTunes"
|
||||
tickets: [1044619]
|
||||
|
||||
- title: "Fix using the 'configure this device' menu action not validating settings"
|
||||
|
||||
- title: "Device drivers: Ignore corrupted entries in metadata.calibre, instead of raising an error"
|
||||
|
||||
- title: "PDF Output: Do not error out when generating an outline which points to pages that have been removed."
|
||||
tickets: [1044799]
|
||||
|
||||
- title: "PDF Output: Fix incorrect page numbers being generated in the outline when converting some books"
|
||||
|
||||
- title: "PDF Output: Reduce memory consumption when writing out the PDF file, by using a stream"
|
||||
|
||||
- title: "EPUB metadata: When there are multiple <dc:date> tags use the one with the earliest date as the published date"
|
||||
|
||||
improved recipes:
|
||||
- Wall Street journal (subscription version)
|
||||
- Houston Chronicle
|
||||
- Various Romanian news sources
|
||||
- Business Week Magazine
|
||||
- Arcamax
|
||||
|
||||
- version: 0.8.67
|
||||
date: 2012-08-31
|
||||
|
||||
new features:
|
||||
- title: "PDF Output: Generate a PDF Outline based on the Table of Contents of the input document"
|
||||
|
||||
- title: "Conversion: Add an option under Structure Detection to set the 'Start reading at' metadata with an XPath expression."
|
||||
tickets: [1043233]
|
||||
|
||||
- title: "Speed up changing the title and author of files with books larger than 3MB by avoiding an unnecessary extra copy."
|
||||
|
||||
- title: "Wireless device driver: Make detecting and connecting to devices easier on networks where mdns is disabled"
|
||||
|
||||
- title: "PDF Output: Allow choosing the default font family and size when generating PDF files (under PDF Options) in the conversion dialog"
|
||||
|
||||
- title: "Metadata dialog: Comments editor: Allow specifying the name of a link when using the insert link button."
|
||||
tickets: [1042683]
|
||||
|
||||
- title: "Remove the unmaintained pdfmanipulate command line utility. There are many other tools that provide similar functionality, for example, pdftk and podofo"
|
||||
|
||||
bug fixes:
|
||||
- title: "Catalogs: Fix regression that broke sorting of non series titles before series titles"
|
||||
|
||||
- title: "PDF Output: Do not create duplicate embedded fonts in the PDF for every individual HTML file in the input document"
|
||||
|
||||
- title: "Fix regression that broke DnD of files having a # character in their names to the book details panel"
|
||||
|
||||
- title: "PDF Output: Allow generating PDF files with more than 512 pages on windows."
|
||||
tickets: [1041614]
|
||||
|
||||
- title: "Fix minor bug in handling of the completion popups when using the next/previous buttons in the edit metadata dialog"
|
||||
ticket: [1041389]
|
||||
|
||||
improved recipes:
|
||||
- Coding Horror
|
||||
- TIME Magazine
|
||||
|
||||
new recipes:
|
||||
- title: Cumhuriyet Yzarlar
|
||||
author: Sethi Eksi
|
||||
|
||||
- title: Arcadia
|
||||
author: Masahiro Hasegawa
|
||||
|
||||
- title: Business Week Magazine and Chronicle of Higher Education
|
||||
author: Rick Shang
|
||||
|
||||
- title: CIPER Chile
|
||||
author: Darko Miletic
|
||||
|
||||
- version: 0.8.66
|
||||
date: 2012-08-24
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Support the display of mathematics in e-books. Supports both embedded TeX and MathML"
|
||||
description: "The calibre ebook viewer can now display embedded mathematics (symbols, equations, fractions, matrices, etc.) in EPUB and HTML ebooks. For details, see: http://manual.calibre-ebook.com/typesetting_math.html"
|
||||
type: major
|
||||
|
||||
- title: "Drivers for SONY PRS-T2, Freelander PD10 and Coolreader Tablet"
|
||||
tickets: [1039103]
|
||||
|
||||
- title: "Wireless device connections: Use a streamed mode for improved networking performance leading to much less time spent sending metadata to/from the device. Also make it easier to specify a fixed port directly in the dialog used to start the connection."
|
||||
|
||||
- title: "Get books: Add ebooksgratuitis.com"
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Handle input epub documents with filenames starting with a dot. Also do not hang if there is an unhandled error."
|
||||
tickets: [1040603]
|
||||
|
||||
- title: "Get Books: Update B&N plugin to handle changes to the B&N website"
|
||||
|
||||
- title: "Content server: Fix regression that caused the port being advertised via BonJour to be incorrect if the user changed the port for the server."
|
||||
tickets: [1037912]
|
||||
|
||||
|
||||
improved recipes:
|
||||
- Variety
|
||||
- The Times UK
|
||||
|
||||
new recipes:
|
||||
- title: Le Monde subscription version
|
||||
author: Remi Vanicat
|
||||
|
||||
- title: Brecha Digital
|
||||
author: Darko Miletic
|
||||
|
||||
- version: 0.8.65
|
||||
date: 2012-08-17
|
||||
|
||||
new features:
|
||||
- title: "A new wireless device driver. This allows connecting wirelessly to a device running a 'smart' calibre client"
|
||||
description: "The wireless connection functions just as if the device was plugged into the computer by USB cable. Currently, Android devices are supported. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid"
|
||||
type: major
|
||||
|
||||
- title: "MOBI Output: Add an option to control the type of MOBI file produced, to the MOBI Output conversion options. You can now generate an old MOBI6, a new KF8 or a joint MOBI6/KF8 file. By default, MOBI6 files are generated. This replaces the previous use of a tweak."
|
||||
|
||||
- title: "E-book viewer: Make paged mode the default. You can go back to the old flow mode by clicking the button with the yellow scroll in the top right corner of the viewer."
|
||||
|
||||
- title: "Driver for COBY kYROS MID7042 and Samsung Galaxy Ace S5839i"
|
||||
|
||||
bug fixes:
|
||||
- title: "Update version of poppler bundled with calibre to fix reading covers from some PDF files"
|
||||
|
||||
- title: "Get Books: Fix clicking of results from Diesel books when there is only a single result not working"
|
||||
|
||||
- title: "Improve detection of system language on first run of calibre"
|
||||
tickets: [1036354]
|
||||
|
||||
- title: "When finding the next series index and the last series index is a fractional number, use the next largest integer, instead of just adding 1"
|
||||
|
||||
- title: "Fix exception when saving a search/replace when no saved search/replace had been opened previously in the bulk search/replace dialog"
|
||||
tickets: [1036464]
|
||||
|
||||
- title: "Fix restore database not restoring entries for the original_* formats"
|
||||
|
||||
- title: "Fix first run wizard not allowing empty email sending settings"
|
||||
tickets: [1036358]
|
||||
|
||||
- title: "Do not error out when setting the cover for a book that has no folders in the library."
|
||||
tickets: [1035935]
|
||||
|
||||
- title: "Conversion pipeline: Ignore unparseable values in the color attribute of font tags, instead of erroring out on them."
|
||||
tickets: [1035633]
|
||||
|
||||
- title: "Catalogs: Fix regression that broke creation of catalogs while a device is connected"
|
||||
|
||||
- title: "Fix --with-library=/whatever not working for calibredb list"
|
||||
|
||||
improved recipes:
|
||||
- Slashdot
|
||||
- Various Canadian newspapers
|
||||
- Business Spectator
|
||||
|
||||
- version: 0.8.64
|
||||
date: 2012-08-09
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Allow viewing images in the book in a separate pop-up window by right clicking on the image. Useful if you want to keep some image, like a map to the side while reading the book."
|
||||
|
||||
- title: "Catalogs: Allow generation of catalogs in AZW3 format. Also add more powerful configuration options to exclude books and set prefixes. See http://www.mobileread.com/forums/showthread.php?t=187298 for details."
|
||||
|
||||
- title: "Generate a PDF version of the User Manual"
|
||||
|
||||
bug fixes:
|
||||
- title: "News download: Fix broken handling of nesting for HTML 5 tags when parsing with BeautifulSoup"
|
||||
|
||||
- title: "EPUB: Handle files in the EPUB that have semi-colons in their file names. This means in particular using URL escaping when creating the NCX as ADE cannot handle unescaped semi-colons in the NCX."
|
||||
tickets: [1033665]
|
||||
|
||||
- title: "Conversion pipeline: Ignore unparseable CSS instead of erroring out on it."
|
||||
tickets: [1034074]
|
||||
|
||||
- title: "When setting up a column coloring rule based on the languages column, allow entry of localized language names instead of only ISO codes"
|
||||
|
||||
- title: "Catalogs: Generate cover for mobi/azw3 catalogs"
|
||||
|
||||
- title: "Update the last modified column record of a book, whenever a format is added to the book."
|
||||
|
||||
- title: "E-book viewer: Fix line scrolling stops at breaks option not working in paged mode"
|
||||
tickets: [1033430]
|
||||
|
||||
- title: "MOBI Output: Fix ToC at start option having no effect when converting some input documents that have an out-of-spine ToC."
|
||||
tickets: [1033656]
|
||||
|
||||
- title: "Catalog Generation: When generating EPUB/MOBI catalogs add more flexible rules for excluding books. Also add rules to customize the prefix characters used."
|
||||
|
||||
- title: "Make setting published date using metadata search/replace more robust."
|
||||
|
||||
- title: "Tag Browser: Flatten the display of sub-groups when sort by is not set to 'name'."
|
||||
tickets: [1032746]
|
||||
|
||||
- title: "Fix isbn:false not matching if other identifiers are attached to the book."
|
||||
|
||||
improved recipes:
|
||||
- The New Republic
|
||||
- ZDNet
|
||||
- Metro UK
|
||||
- FHM UK
|
||||
|
||||
new recipes:
|
||||
- title: eKundelek.pl
|
||||
author: Artur Stachecki
|
||||
|
||||
- title: Sueddeutsche Mobil
|
||||
author: Andreas Zeiser
|
||||
|
||||
- version: 0.8.63
|
||||
date: 2012-08-02
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Allow quick saving and loading of viewer settings as 'themes'."
|
||||
tickets: [1024611]
|
||||
|
||||
- title: "Ebook-viewer: Add a restore defaults button to the viewer preferences dialog"
|
||||
|
||||
- title: "E-book viewer: Add simple settings for text and background colors"
|
||||
|
||||
- title: "Add an entry to save to disk when right clicking a format in the book details panel"
|
||||
|
||||
- title: "ODT metadata: Read first image as the metadata cover from ODT files. Also allow ODT authors to set custom properties for extended metadata."
|
||||
|
||||
- title: "E-book viewer and PDF Output: Resize images that are longer than the page to fit onto a single page"
|
||||
|
||||
bug fixes:
|
||||
- title: "KF8 Output: Fix bug where some calibre generated KF8 files would cause the Amazon KF8 viewer on the Touch to go to into an infinite loop when using the next page function"
|
||||
tickets: [1026421]
|
||||
|
||||
- title: "News download: Add support for <img> tags that link to SVG images."
|
||||
tickets: [1031553]
|
||||
|
||||
- title: "Update podofo to 0.9.1 in all binary builds, to fix corruption of some PDFs when updating metadata."
|
||||
tickets: [1031086]
|
||||
|
||||
- title: "Catalog generation: Handle authors whose last name is a number."
|
||||
|
||||
- title: "KF8 Input: Handle html entities in the NCX toc entries correctly"
|
||||
|
||||
- title: "Fix a calibre crash that affected some windows installs"
|
||||
tickets: [1030234]
|
||||
|
||||
- title: "MOBI Output: Normalize unicode strings before writing to file, to workaround lack of support for non-normal unicode in Amazon's MOBI renderer."
|
||||
tickets: [1029825]
|
||||
|
||||
- title: "EPUB Input: Handle files that have duplicate entries in the spine"
|
||||
|
||||
- title: "Fix regression in Kobo driver that caused the on device column to not be updated after deleting books"
|
||||
|
||||
new recipes:
|
||||
- title: Dziennik Polski
|
||||
author: Gregorz Maj
|
||||
|
||||
- title: High Country Blogs
|
||||
author: Armin Geller
|
||||
|
||||
- title: Philosophy Now
|
||||
author: Rick Shang
|
||||
|
||||
- version: 0.8.62
|
||||
date: 2012-07-27
|
||||
|
||||
new features:
|
||||
- title: "Book details panel: Allow right clicking on a format to delete it."
|
||||
|
||||
- title: "When errors occur in lots of background jobs, add an option to the error message to temporarily suppress subsequent error messages."
|
||||
tickets: [886904]
|
||||
|
||||
- title: "E-book viewer full screen mode: Allow clicking in the left and right page margins to turn pages."
|
||||
tickets: [1024819]
|
||||
|
||||
- title: "Drivers for various Android devices"
|
||||
tickets: [1028690,1027431]
|
||||
|
||||
- title: "Advanced search dialog: When starting on the title/author/etc. tab, restore the previously used search kind as well."
|
||||
tickets: [1029745]
|
||||
|
||||
- title: "When presenting the calibre must be restarted warning after installing a new plugin, add a restart now button so that the user can conveniently restart calibre. Currently only works when going vie Preferences->Plugins->Get new plugins"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix main window layout state being saved incorrectly if calibre is killed without a proper shutdown"
|
||||
|
||||
- title: "Fix boolean and date searching in non english calibre installs."
|
||||
|
||||
- title: "Conversion: Ignore invalid chapter detection and level n ToC expressions instead of erroring out"
|
||||
|
||||
improved recipes:
|
||||
- Psychology Today
|
||||
- The Smithsonian
|
||||
- The New Republic
|
||||
- Various updated Polish news sources
|
||||
- The Sun
|
||||
- San Francisco Bay Guardian
|
||||
- AnandTech
|
||||
- Smashing Magazine
|
||||
|
||||
new recipes:
|
||||
- title: Linux Journal and Conowego.pl
|
||||
author: fenuks
|
||||
|
||||
- title: A list apart and .net magazine
|
||||
author: Marc Busque
|
||||
|
||||
- version: 0.8.61
|
||||
date: 2012-07-20
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Add a paged mode that splits up the text into pages, like in a paper book instead of presenting it as a single column. To activate click the button with the yellow scroll icon in the top right corner."
|
||||
type: major
|
||||
description: "In paged mode, the ebook viewer no longer cuts off the last line of text at the bottom of the screen, and it respects CSS page-break directives. You can also set page margins and control the number of pages displayed on screen by clicking the Preferences button in the viewer and going to 'Text layout in paged mode'."
|
||||
|
||||
- title: "Digitally sign the calibre OS X and windows builds"
|
||||
|
||||
- title: "Get Books: Add Mills and Boon UK"
|
||||
|
||||
- title: "Various minor improvements to the Bulk metadata edit dialog"
|
||||
tickets: [1025825, 1025838, 1025628]
|
||||
|
||||
- title: "Fix various regression in the auto-complete functionality for authors/series/tags etc introduced in 0.8.60"
|
||||
|
||||
- title: "Drivers for various new Android devices"
|
||||
tickets: [1024934]
|
||||
|
||||
- title: "MOBI: Add support for the new language EXTH header field in MOBI files generated by kindlegen 2.5"
|
||||
|
||||
bug fixes:
|
||||
- title: "KF8 Output: Fix calibre produced KF8 files not showing the 'Use publisher font' option on the Kindle Touch when they have embedded fonts"
|
||||
|
||||
- title: "Txt/fb2/rtf/pml/rb output: Fix non-visibile element's tail text (which should be visible) is being ignored when it shouldn't."
|
||||
tickets: [1026541]
|
||||
|
||||
- title: "Book details panel: When displaying a link to amazon, use a country specific name like amazon.fr instead of using amazon.com for all countries"
|
||||
|
||||
- title: "Conversion: When splitting on page breaks, ignore page-breaks with values of auto and inherit. "
|
||||
tickets: [1018875]
|
||||
|
||||
- title: "Metadata jacket: Specify foreground in addition to the background color for the title banner so that it remain readable if the user tries to monkey with the CSS in the viewer."
|
||||
|
||||
- title: "PDF Output: Fix rendering of cover as first age of PDF (ignore margins so that the image covers the entire page)"
|
||||
|
||||
- title: "Linux binaries: Bundle libglib to avoid incompatibilities with glib on various distros."
|
||||
tickets: [1022019]
|
||||
|
||||
- title: "Fix find_identical_books() choking on books with too many authors"
|
||||
|
||||
|
||||
improved recipes:
|
||||
- Toronto Star
|
||||
- American Prospect
|
||||
- faz.net
|
||||
|
||||
- version: 0.8.60
|
||||
date: 2012-07-13
|
||||
|
||||
new features:
|
||||
- title: "When searching, allow use of un-accented characters to match accented characters in all fields and all languages (not just authors and English as before)"
|
||||
description: "The rules for matching un-accented characters are done in a language dependent way. So if your calibre interface language is set to English, n will match both n and ñ, but if it is set to Spanish, it will match only n, as in Spanish ñ is a separate alphabet in Spanish. This makes searching a little slower, so if you have a very large library you can turn it off via Preferences->Searching."
|
||||
type: major
|
||||
|
||||
- title: "Content server: Show a best guess for the IP address the content server is currently listening at in the connect/share menu."
|
||||
tickets: [1024128]
|
||||
|
||||
- title: "E-book viewer: Add an option to show a clock in full screen mode."
|
||||
tickets: [1022086]
|
||||
|
||||
- title: "Drivers for Paquito Imaginarium and a few Android phones"
|
||||
tickets: [1024021,1023613,1023461,1022401]
|
||||
|
||||
- title: "HTMLZ Output: Add option to use the book title as the filename for the html file inside the archive"
|
||||
|
||||
- title: "Make the list of displayed fields in the book details panel a per library setting"
|
||||
|
||||
- title: "Have autocomplete on authors/series/tags/etc. ignore accented characters when finding matches (similar to the changes to search above)"
|
||||
|
||||
- title: "Support for retina displays in OS X (I hope)"
|
||||
tickets: [1022191]
|
||||
|
||||
- title: "Remove the dependency on the zip command line tool when developing plugins"
|
||||
|
||||
bug fixes:
|
||||
- title: "Kobo driver: Do not perform write operations on the Kobo database if its version is newer than the latest version the driver supports, for safety"
|
||||
|
||||
- title: "KF8 Input: Ignore encoding declarations inside the html markup, as they are sometimes incorrect."
|
||||
tickets: [1022933]
|
||||
|
||||
- title: "Force refresh of cached composite column values when values in the cache are changed"
|
||||
|
||||
- title: "Fix a regression that broke calibre --shutdown-running-calibre on windows."
|
||||
tickets: [1022504]
|
||||
|
||||
- title: "Possible workaround for Qt 4.8.2 open file dialog failing on some linux distros."
|
||||
tickets: [1022019]
|
||||
|
||||
- title: "Catalogs: Fix some epubcheck errors when generating catalogs in EPUB format"
|
||||
|
||||
- title: "Linux installer: When calling the xdg utilities use system libraries rather than the libraries bundled with calibre"
|
||||
|
||||
- title: "Fix numeric sort for composite custom columns that use custom separators"
|
||||
tickets: [1021814]
|
||||
|
||||
- title: "Tag browser: When grouping by first letter, handle languages that have 'letters' made of more than one character. This can be turned off via Preferences->Tweaks"
|
||||
|
||||
improved recipes:
|
||||
- Hola magazine
|
||||
- Adventure Gamers
|
||||
- Cosmopolitan UK
|
||||
- Onda Rock
|
||||
|
||||
new recipes:
|
||||
- title: Empire Magazine
|
||||
author: Dave Asbury
|
||||
|
||||
- title: NZZ Folio
|
||||
author: Bernd Leinfelder
|
||||
|
||||
- title: Warentest
|
||||
author: asdfdsfksd
|
||||
|
||||
|
||||
- version: 0.8.59
|
||||
date: 2012-07-06
|
||||
|
||||
new features:
|
||||
- title: "Drivers for Samsung SGH-T989 and Sony Ericsson Sola"
|
||||
tickets: [1021365]
|
||||
|
||||
- title: "Conversion pipeline: When removing the first image, also remove the html file the image is found in, if that file has no other content. Allows this option to be used to remove covers from EPUB files without leaving behind a blank page."
|
||||
|
||||
- title: "Content server: Add a navigation panel at the bottom of each page."
|
||||
tickets: [1020225]
|
||||
|
||||
- title: "calibredb: Add a backup_metadata command to manually run the backup to opf from the command line"
|
||||
|
||||
- title: "User defined driver: Add option to swap main memory and card a."
|
||||
tickets: [1020056]
|
||||
|
||||
- title: "Add new option to the series_index_auto_increment tweak, no_change, that causes calibre not to change the series_index when the series is changed"
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Resize large images so that they do not get off at the right edge of the page."
|
||||
|
||||
- title: "On linux ensure that WM_CLASS for the main calibre GUI is set to 'calibre-gui' to match the name of the calibre-gui.desktop file. This is apparently required by the GNOME 3 shell."
|
||||
tickets: [1020297]
|
||||
|
||||
- title: "Update ICU in all builds to version 49.1"
|
||||
|
||||
- title: "Tag browser: Fix regression that broke drag and drop between user categories in the tag browser"
|
||||
|
||||
- title: "When copying to library and deleting after copy, do not place deleted files in recycle bin, as this is redundant and slow (they have already been copied into another library)"
|
||||
|
||||
- title: "Fix yes/no fields with value of No not showing up in the book details panel"
|
||||
|
||||
- title: "Catalogs: Better sorting for non English languages"
|
||||
tickets: [930882]
|
||||
|
||||
- title: "Get Books: Fix Foyles UK, Weightless books, ebooks.com and ozon.ru"
|
||||
|
||||
- title: "CHM Input: Fix handling of chm files that split their html into multiple sub-directories."
|
||||
tickets: [1018792]
|
||||
|
||||
improved recipes:
|
||||
- FHM UK
|
||||
- The Age
|
||||
- weblogs_ssl
|
||||
- Heraldo.es
|
||||
|
||||
new recipes:
|
||||
- title: CATO Institute and Heritage Foundation
|
||||
author: _reader
|
||||
|
||||
- version: 0.8.58
|
||||
date: 2012-06-29
|
||||
|
||||
new features:
|
||||
- title: "Add some texture to calibre generated covers"
|
||||
|
||||
- title: "Drivers for Sogo SS-4370, HTC G2 and Lenovo ThinkPad Tablet"
|
||||
tickets: [1019050, 1017010]
|
||||
|
||||
- title: "Add search to the Manage tags/series/etc. dialogs"
|
||||
|
||||
- title: "News download: Add support for images embedded in the HTML"
|
||||
|
||||
- title: "calibre -s now waits for calibre to shutdown"
|
||||
|
||||
bug fixes:
|
||||
- title: "Workaround for iTunes breaking scripting with version 10.6.3 on OS X."
|
||||
tickets: [1012243]
|
||||
|
||||
- title: "EPUB Input: When there are multiple elements of the same type in the OPF guide, use the first rather than the last element."
|
||||
|
||||
- title: "Windows: Disable the new UI style if the color depth of the desktop is less than 32 bits per pixel"
|
||||
|
||||
- title: "ISBNDB metadata plugin: Return results even though they have no comments"
|
||||
|
||||
- title: "More robust handling of EINTR during IPC"
|
||||
|
||||
- title: "Metadata download: Support for amazon's new results page markup"
|
||||
|
||||
- title: "EPUB Output: Fix a bug that could cause corrupted output when doing an EPUB/OEB to EPUB conversion if the input EPUB had multiple files with the same name"
|
||||
|
||||
- title: "KF8 Output: Fix a couple of bugs that could lead to generation of invalid KF8 files."
|
||||
tickets: [1016672]
|
||||
|
||||
improved recipes:
|
||||
- ABC Digital
|
||||
- O Globo
|
||||
|
||||
new recipes:
|
||||
- title: Sign of the Times and New Statesman
|
||||
author: TerminalVeracity
|
||||
|
||||
- title: CT24
|
||||
author: zoidozoido
|
||||
|
||||
- title: SmileZilla
|
||||
author: Will
|
||||
|
||||
- title: Marketing Sensoriale
|
||||
author: NotTaken
|
||||
|
||||
- version: 0.8.57
|
||||
date: 2012-06-22
|
||||
|
||||
new features:
|
||||
- title: "PDF Output: Full pagination support. No more cutoff bottom line."
|
||||
type: major
|
||||
description: "Fixes a long standing bug in calibre's PDF Output that caused the bottom line of some pages to be partially cut off and prevented top and bottom margins from working."
|
||||
|
||||
- title: "calibredb add now prints out the ids of added books"
|
||||
tickets: [1014303]
|
||||
|
||||
- title: "Kobo Vox driver: Add support for new Google Play firmware"
|
||||
tickets: [1014129]
|
||||
|
||||
- title: "Driver for Prestigio PMP5097PRO"
|
||||
tickets: [1013864]
|
||||
|
||||
- title: "Add option to disable tooltips in the book list under Preferences->Look & Feel"
|
||||
|
||||
- title: "When customizing builtin recipes download the latest version of the recipe to customize instead of using the possibly out of date bundled version"
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Use the cover from the input document when no cover is specified during a conversion"
|
||||
|
||||
- title: "E-book Viewer: Printing now has proper pagination with top and bottom margins no lines partially cut-off at the bottom and full style retention"
|
||||
|
||||
- title: "KF8 Input: Handle files with incorrectly encoded guide type entries."
|
||||
tickets: [1015020]
|
||||
|
||||
- title: "E-book viewer: Disable hyphenation on windows xp as Qt WebKit barfs on soft hyphens on windows XP"
|
||||
|
||||
- title: "Handle OS X systems with invalid palette colors."
|
||||
tickets: [1014900]
|
||||
|
||||
- title: "Tag Browser: Fix regression that broke partitioning of hierarchical categories."
|
||||
tickets: [1014065]
|
||||
|
||||
- title: "LRF Output: Handle negative page margins"
|
||||
tickets: [1014103]
|
||||
|
||||
- title: "Template language: Fix arithmetic functions to tolerate the value 'None' as returned by raw_field()"
|
||||
|
||||
- title: "Fix custom title sort set in the edit metadata dialog getting reset by the conversion dialog"
|
||||
|
||||
improved recipes:
|
||||
- The Economist
|
||||
- Akter
|
||||
- 24 Sata sr
|
||||
- Novi List
|
||||
- Metro Montreal
|
||||
- Mode Durable
|
||||
- CanardPC
|
||||
- The Economic Collapse
|
||||
- Our Daily Bread
|
||||
|
||||
new recipes:
|
||||
- title: Akter Daily
|
||||
author: Darko MIletic
|
||||
|
||||
- title: BBC Brasil
|
||||
author: Claviola
|
||||
|
||||
- title: Homopedia.pl
|
||||
author: rainbowwarrior
|
||||
|
||||
- title: National Geographic Magazine
|
||||
author: Terminal Veracity
|
||||
|
||||
- title: Something Awful
|
||||
author: atordo
|
||||
|
||||
- title: Huffington Post UK
|
||||
author: Krittika Goyal
|
||||
|
||||
- version: 0.8.56
|
||||
date: 2012-06-15
|
||||
|
||||
new features:
|
||||
- title: "Make the new calibre style default on Windows and OS X."
|
||||
type: major
|
||||
description: "This change gives a more 'modern' feel to the calibre user interface with focus highlighting, gradients, rounded corners, etc. In case you prefer the old look, you can restore under Preferences->Look & Feel->User interface style"
|
||||
|
||||
- title: "Get Books: Add the new SONY Reader store"
|
||||
|
||||
- title: "Read metadata from .docx (Microsoft Word) files"
|
||||
|
||||
- title: "Allow customizing the behavior of the searching for similar books by right clicking the book. You can now tell calibre to search different columns than the traditional author/series/publisher/tags/etc. in Preferences->Searching"
|
||||
|
||||
- title: "Add option to restore alternating row colors to the Tag Browser under Preferences->Look & Feel->Tag Browser"
|
||||
|
||||
- title: "Update to Qt 4.8.2 on windows compiled with link time code generation for a small performance boost"
|
||||
|
||||
bug fixes:
|
||||
- title: "Get Books: Update plugins to handle website changes at ebooks.com, project gutenberg, and virtualo"
|
||||
|
||||
- title: "AZW3 Output: Fix TOC at start option not working"
|
||||
|
||||
- title: "AZW3 Output: Close self closing script/style/title/head tags explicitly as they cause problems in webkit based renderers like the Kindle Fire and calibre's viewers."
|
||||
|
||||
- title: "Fix the current_library_name() template function not updating after a library switch"
|
||||
|
||||
- title: "AZW3 Output: Handle the case of a link pointing to the last line of text in the document."
|
||||
tickets: [1011330]
|
||||
|
||||
- title: "Fix regression in 0.8.55 that broke highlighting of items matching a search in the Tag Browser"
|
||||
tickets: [1011030]
|
||||
|
||||
- title: "News download: Handle query only relative URLs"
|
||||
|
||||
improved recipes:
|
||||
- Christian Science Monitor
|
||||
- Neue Zurcher Zeitung
|
||||
- Birmignham Post
|
||||
- Metro UK
|
||||
- New Musical Express
|
||||
- The Independent
|
||||
- The Daily Mirror
|
||||
- Vreme
|
||||
- Smithsonian Magazine
|
||||
|
||||
new recipes:
|
||||
- title: NZZ Webpaper
|
||||
author: Bernd Leinfelder
|
||||
|
||||
|
||||
- version: 0.8.55
|
||||
date: 2012-06-08
|
||||
|
||||
new features:
|
||||
- title: "Add a new 'Calibre style' interface look that is more modern than the default look. You can select it via Preferences->Look & Feel->User interface style."
|
||||
|
||||
- title: "New, subtler look for the Tag Browser"
|
||||
|
||||
- title: "Driver for Trekstor Pyrus and Pantech Android Tablet"
|
||||
tickets: [1008946, 1007929]
|
||||
|
||||
- title: "Conversion pipeline: Handle guide elements with incorrectly cased hrefs. Also handle guide elements of type coverimagestandard and thumbimagestandard."
|
||||
|
||||
- title: "Allow user to customize trekstor plugin to send books into sub directories."
|
||||
tickets: [1007646]
|
||||
|
||||
- title: "EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now."
|
||||
tickets: [1008810]
|
||||
|
||||
- title: "Save single format to disk: Only show the format available in the selected books."
|
||||
tickets: [1007287]
|
||||
|
||||
bug fixes:
|
||||
- title: "MOBI Output: When using the insert metadata at start of book option, do not use a table to layout the metadata, as the Kindle Fire crashes when rendering the table."
|
||||
tickets: [1002119]
|
||||
|
||||
- title: "Device detection: Fix a bug that could cause device detection to fail completely if devices with certain vendor/product ids are connected."
|
||||
tickets: [1009718]
|
||||
|
||||
- title: "MOBI Output: When rasterizing svgs only compute style information when an actual svg image is present. Small speedup when converting large svg-free documents to MOBI."
|
||||
|
||||
- title: "SONY T1 driver: Fix support for collections of books placed on the SD card"
|
||||
tickets: [986044]
|
||||
|
||||
- title: "Fix partitioning problems in tag browser with fields that have no name, such as identifiers and formats"
|
||||
|
||||
- title: "Welcome wizard: Preferentially use the kindle email address set as default when more than one such address exists."
|
||||
tickets: [1007932 ]
|
||||
|
||||
- title: "Fix regression in 0.8.54 that broke the use of the shortcut Alt+A to select books by the same author"
|
||||
|
||||
improved recipes:
|
||||
- Various Polish recipes
|
||||
- Vice Magazine
|
||||
- EL Mundo Today
|
||||
- Haaretz
|
||||
- Good Housekeeping
|
||||
- El Pais
|
||||
- Christian Science Monitor
|
||||
- Marketing Magazine
|
||||
- Instapaper
|
||||
|
||||
new recipes:
|
||||
- title: Various Philippine news sources
|
||||
author: jde
|
||||
|
||||
- title: Natemat.pl and wirtualnemedia.pl
|
||||
author: fenuks
|
||||
|
||||
- title: Rabble.ca
|
||||
author: timtoo
|
||||
|
||||
- version: 0.8.54
|
||||
date: 2012-05-31
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: The Table of contents panel now tracks the current position in the book. As you scroll through the book, the entry you are currently on is highlighted."
|
||||
type: major
|
||||
description: "To see this feature in action, open the Table of Contents panel in the viewer by clicking the button with three blue lines on it. As you page through the book, the chapter you are reading currently is highlighted in the Table of Contents Panel. Obviously, this will only work if the book you are reading has a Table of Contents. You can also use the Ctrl+PgUp and Ctrl+PgDn keys to quickly skip between chapters."
|
||||
|
||||
- title: "calibredb: Allow setting metadata for individual fields with the set_metadata command"
|
||||
|
||||
- title: "Make it a little harder to accidentally change the sorting of items in the Tag Browser. Also frees up more vertical space for the Tag Browser itself."
|
||||
|
||||
- title: "The calibre user manual is now available in AZW3 format as well as EPUB"
|
||||
|
||||
bug fixes:
|
||||
- title: "Automatic titlecasing: No longer try to capitalize scottish names, as there are too many special cases."
|
||||
tickets: [775825]
|
||||
|
||||
- title: "Never crash when reading metadata from PDF files (reading now always happens in a worker process)"
|
||||
tickets: [1006452]
|
||||
|
||||
- title: "EPUB Input: Do no skip the valid children of an NCX node that has no text/href"
|
||||
|
||||
- title: "Archos driver: Detect SD card"
|
||||
tickets: [1005650]
|
||||
|
||||
- title: "When bulk downloading metadata and the user deletes one of the books for which metadata is being downloaded, just ignore it, instead of erroring out"
|
||||
|
||||
- title: "When deleting books from the bottom of the booklist, ensure that the bottom book after deleting is selected"
|
||||
|
||||
- title: "Fix regression in 0.8.53 that broke sending APNX files to older Kindle devices"
|
||||
|
||||
- title: "Use correct text color for selected rows in the list of matches when downloading metadata and showing results in get books."
|
||||
tickets: [1004568]
|
||||
|
||||
improved recipes:
|
||||
- The Independent
|
||||
- Welt der Physik
|
||||
- China Daily
|
||||
- The Grid
|
||||
- Prospect Magazine
|
||||
|
||||
new recipes:
|
||||
- title: La gazetta del Mezzogiorno
|
||||
author: faber1971
|
||||
|
||||
|
||||
- version: 0.8.53
|
||||
date: 2012-05-25
|
||||
|
||||
|
865
Changelog.yaml
865
Changelog.yaml
@ -20,6 +20,53 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.9.34
|
||||
date: 2013-06-07
|
||||
|
||||
new features:
|
||||
- title: "Conversion of Microsoft Word documents (.docx files generated by Word 2007 or newer)"
|
||||
type: major
|
||||
description: "DOCX files created with Microsoft Word 2007 or newer can now be converted by calibre. The converter has support for lists, tables, images, all types of text formatting, footnotes, endnotes and even dropcaps. A sample docx file showing the capabilities of the converter is available: http://calibre-ebook.com/downloads/demos/demo.docx Note that this code is still very new, so there are more than likely a few bugs waiting to be squashed."
|
||||
|
||||
- title: "Kobo driver: Support for the newly released firmware 2.6.1. Also remove empty shelves from the Aura HD home page when deleting books."
|
||||
tickets: [1187791]
|
||||
|
||||
- title: "E-book viewer: Add Keyboard shortcuts for Back and Forward (Alt+Left, Alt+Right)"
|
||||
tickets: [1186928]
|
||||
|
||||
- title: "Allow right clicking on an author in Book Details to manage that author, i.e. change the author name, sort value or link."
|
||||
tickets: [1186192]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression that broke FB2 input in the previous release."
|
||||
tickets: [1186213]
|
||||
|
||||
- title: "Catalog generation on OS X: Fix handling of some unicode characters"
|
||||
tickets: [1066922]
|
||||
|
||||
- title: "HTML Input: Avoid spurious log warnings about unspecified language/creator when these are actually specified on the command line."
|
||||
tickets: [1186899]
|
||||
|
||||
- title: "MOBI Output: Fix regression in 0.9.31 that caused vertical margins specified on some block level elements to be ignored."
|
||||
tickets: [1186533]
|
||||
|
||||
- title: "ToC Editor: Handle ebooks that have <p> tags inside the <head> tags. Instead of erroring out, the <p> tags are automatically moved into <body>."
|
||||
tickets: [1186298]
|
||||
|
||||
- title: "Linux build: Include the ffi libs from both gcc and libffi."
|
||||
tickets: [1186148]
|
||||
|
||||
- title: "When deleting custom recipes, use recycle bin."
|
||||
tickets: [1186142]
|
||||
|
||||
improved recipes:
|
||||
- Folha de Sao Paolo
|
||||
- Metro News NL
|
||||
|
||||
new recipes:
|
||||
- title: Seventh Guard
|
||||
author: koliberek
|
||||
|
||||
- version: 0.9.33
|
||||
date: 2013-05-31
|
||||
|
||||
@ -1878,821 +1925,3 @@
|
||||
author: drMerry
|
||||
|
||||
|
||||
- version: 0.8.69
|
||||
date: 2012-09-14
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Add a button to the toolbar to switch themes easily"
|
||||
tickets: [1047992]
|
||||
|
||||
- title: "When downloading metadata for many books, if some of them fail, add an option to the downloaded message to show the failed books in the main book list, so that they can be individually processed easily"
|
||||
|
||||
- title: "Remember last used window size of the conversion dialogs."
|
||||
tickets: [1049265]
|
||||
|
||||
- title: "Kindle driver: Turn on sending of azw3 files to kindles by default, since the KK now has azw3 support"
|
||||
|
||||
- title: "Conversion: Add support for CSS pseudo classes :hover, :link, :visited, :first-line, :focus, :active, :first-letter"
|
||||
|
||||
- title: "Wireless device driver: Make the default save template not use folders"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix a regression in th previous release that broke sending of books to the second SD card in SONY readers"
|
||||
tickets: [1047992]
|
||||
|
||||
- title: "Fix a memory leak when scanning for devices in windows"
|
||||
|
||||
- title: "Ebook-viewer: When displaying mathematics, reflow equations that dont fit on a single line"
|
||||
|
||||
- title: "Catalogs: Do not mark the AZW3 catalog as a periodical, as most Kindle devices cannot handle AZW3 periodicals"
|
||||
|
||||
- title: "Content server: When using a custom IP address to listen on via Preferences->Tweaks advertise that IP address via BonJour."
|
||||
|
||||
- title: "Fix ebook catalog generation on linux systems where the encoding is not UTF-8."
|
||||
tickets: [1048404]
|
||||
|
||||
improved recipes:
|
||||
- De Volksrant
|
||||
- Metro UK
|
||||
- Countryfile
|
||||
- Die Zeit (subscription)
|
||||
- Birmingham post
|
||||
|
||||
new recipes:
|
||||
- title: History Today
|
||||
author: Rick Shang
|
||||
|
||||
- version: 0.8.68
|
||||
date: 2012-09-07
|
||||
|
||||
new features:
|
||||
- title: "Drivers for the Nokia N9, Viewsonic 7e, Prestigio PER3274B and Coby Kyros 7035 "
|
||||
tickets: [1046794,1046544]
|
||||
|
||||
- title: "Add a tutorial on creating catalogs to the User Manual and a link to it in the create catalogs dialog"
|
||||
|
||||
- title: "Wireless device connections: Add an option to force calibre to listen on a particular IP address. Access it by customizing the plugin in Preferences->Plugins"
|
||||
|
||||
- title: "Android driver: Add an extra customization option to configure the directory to which ebooks are sent on the storage cards."
|
||||
tickets: [1045045]
|
||||
|
||||
- title: "Add an option under Preferences->Look & Feel->Book Details to hide the cover in the book details panel"
|
||||
|
||||
- title: "The Calibre Companion Android app that allows wireless connection of Android device to calibre is out of beta. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix sorting by author not working in the device view in calibre when connected to iTunes"
|
||||
tickets: [1044619]
|
||||
|
||||
- title: "Fix using the 'configure this device' menu action not validating settings"
|
||||
|
||||
- title: "Device drivers: Ignore corrupted entries in metadata.calibre, instead of raising an error"
|
||||
|
||||
- title: "PDF Output: Do not error out when generating an outline which points to pages that have been removed."
|
||||
tickets: [1044799]
|
||||
|
||||
- title: "PDF Output: Fix incorrect page numbers being generated in the outline when converting some books"
|
||||
|
||||
- title: "PDF Output: Reduce memory consumption when writing out the PDF file, by using a stream"
|
||||
|
||||
- title: "EPUB metadata: When there are multiple <dc:date> tags use the one with the earliest date as the published date"
|
||||
|
||||
improved recipes:
|
||||
- Wall Street journal (subscription version)
|
||||
- Houston Chronicle
|
||||
- Various Romanian news sources
|
||||
- Business Week Magazine
|
||||
- Arcamax
|
||||
|
||||
- version: 0.8.67
|
||||
date: 2012-08-31
|
||||
|
||||
new features:
|
||||
- title: "PDF Output: Generate a PDF Outline based on the Table of Contents of the input document"
|
||||
|
||||
- title: "Conversion: Add an option under Structure Detection to set the 'Start reading at' metadata with an XPath expression."
|
||||
tickets: [1043233]
|
||||
|
||||
- title: "Speed up changing the title and author of files with books larger than 3MB by avoiding an unnecessary extra copy."
|
||||
|
||||
- title: "Wireless device driver: Make detecting and connecting to devices easier on networks where mdns is disabled"
|
||||
|
||||
- title: "PDF Output: Allow choosing the default font family and size when generating PDF files (under PDF Options) in the conversion dialog"
|
||||
|
||||
- title: "Metadata dialog: Comments editor: Allow specifying the name of a link when using the insert link button."
|
||||
tickets: [1042683]
|
||||
|
||||
- title: "Remove the unmaintained pdfmanipulate command line utility. There are many other tools that provide similar functionality, for example, pdftk and podofo"
|
||||
|
||||
bug fixes:
|
||||
- title: "Catalogs: Fix regression that broke sorting of non series titles before series titles"
|
||||
|
||||
- title: "PDF Output: Do not create duplicate embedded fonts in the PDF for every individual HTML file in the input document"
|
||||
|
||||
- title: "Fix regression that broke DnD of files having a # character in their names to the book details panel"
|
||||
|
||||
- title: "PDF Output: Allow generating PDF files with more than 512 pages on windows."
|
||||
tickets: [1041614]
|
||||
|
||||
- title: "Fix minor bug in handling of the completion popups when using the next/previous buttons in the edit metadata dialog"
|
||||
ticket: [1041389]
|
||||
|
||||
improved recipes:
|
||||
- Coding Horror
|
||||
- TIME Magazine
|
||||
|
||||
new recipes:
|
||||
- title: Cumhuriyet Yzarlar
|
||||
author: Sethi Eksi
|
||||
|
||||
- title: Arcadia
|
||||
author: Masahiro Hasegawa
|
||||
|
||||
- title: Business Week Magazine and Chronicle of Higher Education
|
||||
author: Rick Shang
|
||||
|
||||
- title: CIPER Chile
|
||||
author: Darko Miletic
|
||||
|
||||
- version: 0.8.66
|
||||
date: 2012-08-24
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Support the display of mathematics in e-books. Supports both embedded TeX and MathML"
|
||||
description: "The calibre ebook viewer can now display embedded mathematics (symbols, equations, fractions, matrices, etc.) in EPUB and HTML ebooks. For details, see: http://manual.calibre-ebook.com/typesetting_math.html"
|
||||
type: major
|
||||
|
||||
- title: "Drivers for SONY PRS-T2, Freelander PD10 and Coolreader Tablet"
|
||||
tickets: [1039103]
|
||||
|
||||
- title: "Wireless device connections: Use a streamed mode for improved networking performance leading to much less time spent sending metadata to/from the device. Also make it easier to specify a fixed port directly in the dialog used to start the connection."
|
||||
|
||||
- title: "Get books: Add ebooksgratuitis.com"
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Handle input epub documents with filenames starting with a dot. Also do not hang if there is an unhandled error."
|
||||
tickets: [1040603]
|
||||
|
||||
- title: "Get Books: Update B&N plugin to handle changes to the B&N website"
|
||||
|
||||
- title: "Content server: Fix regression that caused the port being advertised via BonJour to be incorrect if the user changed the port for the server."
|
||||
tickets: [1037912]
|
||||
|
||||
|
||||
improved recipes:
|
||||
- Variety
|
||||
- The Times UK
|
||||
|
||||
new recipes:
|
||||
- title: Le Monde subscription version
|
||||
author: Remi Vanicat
|
||||
|
||||
- title: Brecha Digital
|
||||
author: Darko Miletic
|
||||
|
||||
- version: 0.8.65
|
||||
date: 2012-08-17
|
||||
|
||||
new features:
|
||||
- title: "A new wireless device driver. This allows connecting wirelessly to a device running a 'smart' calibre client"
|
||||
description: "The wireless connection functions just as if the device was plugged into the computer by USB cable. Currently, Android devices are supported. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid"
|
||||
type: major
|
||||
|
||||
- title: "MOBI Output: Add an option to control the type of MOBI file produced, to the MOBI Output conversion options. You can now generate an old MOBI6, a new KF8 or a joint MOBI6/KF8 file. By default, MOBI6 files are generated. This replaces the previous use of a tweak."
|
||||
|
||||
- title: "E-book viewer: Make paged mode the default. You can go back to the old flow mode by clicking the button with the yellow scroll in the top right corner of the viewer."
|
||||
|
||||
- title: "Driver for COBY kYROS MID7042 and Samsung Galaxy Ace S5839i"
|
||||
|
||||
bug fixes:
|
||||
- title: "Update version of poppler bundled with calibre to fix reading covers from some PDF files"
|
||||
|
||||
- title: "Get Books: Fix clicking of results from Diesel books when there is only a single result not working"
|
||||
|
||||
- title: "Improve detection of system language on first run of calibre"
|
||||
tickets: [1036354]
|
||||
|
||||
- title: "When finding the next series index and the last series index is a fractional number, use the next largest integer, instead of just adding 1"
|
||||
|
||||
- title: "Fix exception when saving a search/replace when no saved search/replace had been opened previously in the bulk search/replace dialog"
|
||||
tickets: [1036464]
|
||||
|
||||
- title: "Fix restore database not restoring entries for the original_* formats"
|
||||
|
||||
- title: "Fix first run wizard not allowing empty email sending settings"
|
||||
tickets: [1036358]
|
||||
|
||||
- title: "Do not error out when setting the cover for a book that has no folders in the library."
|
||||
tickets: [1035935]
|
||||
|
||||
- title: "Conversion pipeline: Ignore unparseable values in the color attribute of font tags, instead of erroring out on them."
|
||||
tickets: [1035633]
|
||||
|
||||
- title: "Catalogs: Fix regression that broke creation of catalogs while a device is connected"
|
||||
|
||||
- title: "Fix --with-library=/whatever not working for calibredb list"
|
||||
|
||||
improved recipes:
|
||||
- Slashdot
|
||||
- Various Canadian newspapers
|
||||
- Business Spectator
|
||||
|
||||
- version: 0.8.64
|
||||
date: 2012-08-09
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Allow viewing images in the book in a separate pop-up window by right clicking on the image. Useful if you want to keep some image, like a map to the side while reading the book."
|
||||
|
||||
- title: "Catalogs: Allow generation of catalogs in AZW3 format. Also add more powerful configuration options to exclude books and set prefixes. See http://www.mobileread.com/forums/showthread.php?t=187298 for details."
|
||||
|
||||
- title: "Generate a PDF version of the User Manual"
|
||||
|
||||
bug fixes:
|
||||
- title: "News download: Fix broken handling of nesting for HTML 5 tags when parsing with BeautifulSoup"
|
||||
|
||||
- title: "EPUB: Handle files in the EPUB that have semi-colons in their file names. This means in particular using URL escaping when creating the NCX as ADE cannot handle unescaped semi-colons in the NCX."
|
||||
tickets: [1033665]
|
||||
|
||||
- title: "Conversion pipeline: Ignore unparseable CSS instead of erroring out on it."
|
||||
tickets: [1034074]
|
||||
|
||||
- title: "When setting up a column coloring rule based on the languages column, allow entry of localized language names instead of only ISO codes"
|
||||
|
||||
- title: "Catalogs: Generate cover for mobi/azw3 catalogs"
|
||||
|
||||
- title: "Update the last modified column record of a book, whenever a format is added to the book."
|
||||
|
||||
- title: "E-book viewer: Fix line scrolling stops at breaks option not working in paged mode"
|
||||
tickets: [1033430]
|
||||
|
||||
- title: "MOBI Output: Fix ToC at start option having no effect when converting some input documents that have an out-of-spine ToC."
|
||||
tickets: [1033656]
|
||||
|
||||
- title: "Catalog Generation: When generating EPUB/MOBI catalogs add more flexible rules for excluding books. Also add rules to customize the prefix characters used."
|
||||
|
||||
- title: "Make setting published date using metadata search/replace more robust."
|
||||
|
||||
- title: "Tag Browser: Flatten the display of sub-groups when sort by is not set to 'name'."
|
||||
tickets: [1032746]
|
||||
|
||||
- title: "Fix isbn:false not matching if other identifiers are attached to the book."
|
||||
|
||||
improved recipes:
|
||||
- The New Republic
|
||||
- ZDNet
|
||||
- Metro UK
|
||||
- FHM UK
|
||||
|
||||
new recipes:
|
||||
- title: eKundelek.pl
|
||||
author: Artur Stachecki
|
||||
|
||||
- title: Sueddeutsche Mobil
|
||||
author: Andreas Zeiser
|
||||
|
||||
- version: 0.8.63
|
||||
date: 2012-08-02
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Allow quick saving and loading of viewer settings as 'themes'."
|
||||
tickets: [1024611]
|
||||
|
||||
- title: "Ebook-viewer: Add a restore defaults button to the viewer preferences dialog"
|
||||
|
||||
- title: "E-book viewer: Add simple settings for text and background colors"
|
||||
|
||||
- title: "Add an entry to save to disk when right clicking a format in the book details panel"
|
||||
|
||||
- title: "ODT metadata: Read first image as the metadata cover from ODT files. Also allow ODT authors to set custom properties for extended metadata."
|
||||
|
||||
- title: "E-book viewer and PDF Output: Resize images that are longer than the page to fit onto a single page"
|
||||
|
||||
bug fixes:
|
||||
- title: "KF8 Output: Fix bug where some calibre generated KF8 files would cause the Amazon KF8 viewer on the Touch to go to into an infinite loop when using the next page function"
|
||||
tickets: [1026421]
|
||||
|
||||
- title: "News download: Add support for <img> tags that link to SVG images."
|
||||
tickets: [1031553]
|
||||
|
||||
- title: "Update podofo to 0.9.1 in all binary builds, to fix corruption of some PDFs when updating metadata."
|
||||
tickets: [1031086]
|
||||
|
||||
- title: "Catalog generation: Handle authors whose last name is a number."
|
||||
|
||||
- title: "KF8 Input: Handle html entities in the NCX toc entries correctly"
|
||||
|
||||
- title: "Fix a calibre crash that affected some windows installs"
|
||||
tickets: [1030234]
|
||||
|
||||
- title: "MOBI Output: Normalize unicode strings before writing to file, to workaround lack of support for non-normal unicode in Amazon's MOBI renderer."
|
||||
tickets: [1029825]
|
||||
|
||||
- title: "EPUB Input: Handle files that have duplicate entries in the spine"
|
||||
|
||||
- title: "Fix regression in Kobo driver that caused the on device column to not be updated after deleting books"
|
||||
|
||||
new recipes:
|
||||
- title: Dziennik Polski
|
||||
author: Gregorz Maj
|
||||
|
||||
- title: High Country Blogs
|
||||
author: Armin Geller
|
||||
|
||||
- title: Philosophy Now
|
||||
author: Rick Shang
|
||||
|
||||
- version: 0.8.62
|
||||
date: 2012-07-27
|
||||
|
||||
new features:
|
||||
- title: "Book details panel: Allow right clicking on a format to delete it."
|
||||
|
||||
- title: "When errors occur in lots of background jobs, add an option to the error message to temporarily suppress subsequent error messages."
|
||||
tickets: [886904]
|
||||
|
||||
- title: "E-book viewer full screen mode: Allow clicking in the left and right page margins to turn pages."
|
||||
tickets: [1024819]
|
||||
|
||||
- title: "Drivers for various Android devices"
|
||||
tickets: [1028690,1027431]
|
||||
|
||||
- title: "Advanced search dialog: When starting on the title/author/etc. tab, restore the previously used search kind as well."
|
||||
tickets: [1029745]
|
||||
|
||||
- title: "When presenting the calibre must be restarted warning after installing a new plugin, add a restart now button so that the user can conveniently restart calibre. Currently only works when going vie Preferences->Plugins->Get new plugins"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix main window layout state being saved incorrectly if calibre is killed without a proper shutdown"
|
||||
|
||||
- title: "Fix boolean and date searching in non english calibre installs."
|
||||
|
||||
- title: "Conversion: Ignore invalid chapter detection and level n ToC expressions instead of erroring out"
|
||||
|
||||
improved recipes:
|
||||
- Psychology Today
|
||||
- The Smithsonian
|
||||
- The New Republic
|
||||
- Various updated Polish news sources
|
||||
- The Sun
|
||||
- San Francisco Bay Guardian
|
||||
- AnandTech
|
||||
- Smashing Magazine
|
||||
|
||||
new recipes:
|
||||
- title: Linux Journal and Conowego.pl
|
||||
author: fenuks
|
||||
|
||||
- title: A list apart and .net magazine
|
||||
author: Marc Busque
|
||||
|
||||
- version: 0.8.61
|
||||
date: 2012-07-20
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Add a paged mode that splits up the text into pages, like in a paper book instead of presenting it as a single column. To activate click the button with the yellow scroll icon in the top right corner."
|
||||
type: major
|
||||
description: "In paged mode, the ebook viewer no longer cuts off the last line of text at the bottom of the screen, and it respects CSS page-break directives. You can also set page margins and control the number of pages displayed on screen by clicking the Preferences button in the viewer and going to 'Text layout in paged mode'."
|
||||
|
||||
- title: "Digitally sign the calibre OS X and windows builds"
|
||||
|
||||
- title: "Get Books: Add Mills and Boon UK"
|
||||
|
||||
- title: "Various minor improvements to the Bulk metadata edit dialog"
|
||||
tickets: [1025825, 1025838, 1025628]
|
||||
|
||||
- title: "Fix various regression in the auto-complete functionality for authors/series/tags etc introduced in 0.8.60"
|
||||
|
||||
- title: "Drivers for various new Android devices"
|
||||
tickets: [1024934]
|
||||
|
||||
- title: "MOBI: Add support for the new language EXTH header field in MOBI files generated by kindlegen 2.5"
|
||||
|
||||
bug fixes:
|
||||
- title: "KF8 Output: Fix calibre produced KF8 files not showing the 'Use publisher font' option on the Kindle Touch when they have embedded fonts"
|
||||
|
||||
- title: "Txt/fb2/rtf/pml/rb output: Fix non-visibile element's tail text (which should be visible) is being ignored when it shouldn't."
|
||||
tickets: [1026541]
|
||||
|
||||
- title: "Book details panel: When displaying a link to amazon, use a country specific name like amazon.fr instead of using amazon.com for all countries"
|
||||
|
||||
- title: "Conversion: When splitting on page breaks, ignore page-breaks with values of auto and inherit. "
|
||||
tickets: [1018875]
|
||||
|
||||
- title: "Metadata jacket: Specify foreground in addition to the background color for the title banner so that it remain readable if the user tries to monkey with the CSS in the viewer."
|
||||
|
||||
- title: "PDF Output: Fix rendering of cover as first age of PDF (ignore margins so that the image covers the entire page)"
|
||||
|
||||
- title: "Linux binaries: Bundle libglib to avoid incompatibilities with glib on various distros."
|
||||
tickets: [1022019]
|
||||
|
||||
- title: "Fix find_identical_books() choking on books with too many authors"
|
||||
|
||||
|
||||
improved recipes:
|
||||
- Toronto Star
|
||||
- American Prospect
|
||||
- faz.net
|
||||
|
||||
- version: 0.8.60
|
||||
date: 2012-07-13
|
||||
|
||||
new features:
|
||||
- title: "When searching, allow use of un-accented characters to match accented characters in all fields and all languages (not just authors and English as before)"
|
||||
description: "The rules for matching un-accented characters are done in a language dependent way. So if your calibre interface language is set to English, n will match both n and ñ, but if it is set to Spanish, it will match only n, as in Spanish ñ is a separate alphabet in Spanish. This makes searching a little slower, so if you have a very large library you can turn it off via Preferences->Searching."
|
||||
type: major
|
||||
|
||||
- title: "Content server: Show a best guess for the IP address the content server is currently listening at in the connect/share menu."
|
||||
tickets: [1024128]
|
||||
|
||||
- title: "E-book viewer: Add an option to show a clock in full screen mode."
|
||||
tickets: [1022086]
|
||||
|
||||
- title: "Drivers for Paquito Imaginarium and a few Android phones"
|
||||
tickets: [1024021,1023613,1023461,1022401]
|
||||
|
||||
- title: "HTMLZ Output: Add option to use the book title as the filename for the html file inside the archive"
|
||||
|
||||
- title: "Make the list of displayed fields in the book details panel a per library setting"
|
||||
|
||||
- title: "Have autocomplete on authors/series/tags/etc. ignore accented characters when finding matches (similar to the changes to search above)"
|
||||
|
||||
- title: "Support for retina displays in OS X (I hope)"
|
||||
tickets: [1022191]
|
||||
|
||||
- title: "Remove the dependency on the zip command line tool when developing plugins"
|
||||
|
||||
bug fixes:
|
||||
- title: "Kobo driver: Do not perform write operations on the Kobo database if its version is newer than the latest version the driver supports, for safety"
|
||||
|
||||
- title: "KF8 Input: Ignore encoding declarations inside the html markup, as they are sometimes incorrect."
|
||||
tickets: [1022933]
|
||||
|
||||
- title: "Force refresh of cached composite column values when values in the cache are changed"
|
||||
|
||||
- title: "Fix a regression that broke calibre --shutdown-running-calibre on windows."
|
||||
tickets: [1022504]
|
||||
|
||||
- title: "Possible workaround for Qt 4.8.2 open file dialog failing on some linux distros."
|
||||
tickets: [1022019]
|
||||
|
||||
- title: "Catalogs: Fix some epubcheck errors when generating catalogs in EPUB format"
|
||||
|
||||
- title: "Linux installer: When calling the xdg utilities use system libraries rather than the libraries bundled with calibre"
|
||||
|
||||
- title: "Fix numeric sort for composite custom columns that use custom separators"
|
||||
tickets: [1021814]
|
||||
|
||||
- title: "Tag browser: When grouping by first letter, handle languages that have 'letters' made of more than one character. This can be turned off via Preferences->Tweaks"
|
||||
|
||||
improved recipes:
|
||||
- Hola magazine
|
||||
- Adventure Gamers
|
||||
- Cosmopolitan UK
|
||||
- Onda Rock
|
||||
|
||||
new recipes:
|
||||
- title: Empire Magazine
|
||||
author: Dave Asbury
|
||||
|
||||
- title: NZZ Folio
|
||||
author: Bernd Leinfelder
|
||||
|
||||
- title: Warentest
|
||||
author: asdfdsfksd
|
||||
|
||||
|
||||
- version: 0.8.59
|
||||
date: 2012-07-06
|
||||
|
||||
new features:
|
||||
- title: "Drivers for Samsung SGH-T989 and Sony Ericsson Sola"
|
||||
tickets: [1021365]
|
||||
|
||||
- title: "Conversion pipeline: When removing the first image, also remove the html file the image is found in, if that file has no other content. Allows this option to be used to remove covers from EPUB files without leaving behind a blank page."
|
||||
|
||||
- title: "Content server: Add a navigation panel at the bottom of each page."
|
||||
tickets: [1020225]
|
||||
|
||||
- title: "calibredb: Add a backup_metadata command to manually run the backup to opf from the command line"
|
||||
|
||||
- title: "User defined driver: Add option to swap main memory and card a."
|
||||
tickets: [1020056]
|
||||
|
||||
- title: "Add new option to the series_index_auto_increment tweak, no_change, that causes calibre not to change the series_index when the series is changed"
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Resize large images so that they do not get off at the right edge of the page."
|
||||
|
||||
- title: "On linux ensure that WM_CLASS for the main calibre GUI is set to 'calibre-gui' to match the name of the calibre-gui.desktop file. This is apparently required by the GNOME 3 shell."
|
||||
tickets: [1020297]
|
||||
|
||||
- title: "Update ICU in all builds to version 49.1"
|
||||
|
||||
- title: "Tag browser: Fix regression that broke drag and drop between user categories in the tag browser"
|
||||
|
||||
- title: "When copying to library and deleting after copy, do not place deleted files in recycle bin, as this is redundant and slow (they have already been copied into another library)"
|
||||
|
||||
- title: "Fix yes/no fields with value of No not showing up in the book details panel"
|
||||
|
||||
- title: "Catalogs: Better sorting for non English languages"
|
||||
tickets: [930882]
|
||||
|
||||
- title: "Get Books: Fix Foyles UK, Weightless books, ebooks.com and ozon.ru"
|
||||
|
||||
- title: "CHM Input: Fix handling of chm files that split their html into multiple sub-directories."
|
||||
tickets: [1018792]
|
||||
|
||||
improved recipes:
|
||||
- FHM UK
|
||||
- The Age
|
||||
- weblogs_ssl
|
||||
- Heraldo.es
|
||||
|
||||
new recipes:
|
||||
- title: CATO Institute and Heritage Foundation
|
||||
author: _reader
|
||||
|
||||
- version: 0.8.58
|
||||
date: 2012-06-29
|
||||
|
||||
new features:
|
||||
- title: "Add some texture to calibre generated covers"
|
||||
|
||||
- title: "Drivers for Sogo SS-4370, HTC G2 and Lenovo ThinkPad Tablet"
|
||||
tickets: [1019050, 1017010]
|
||||
|
||||
- title: "Add search to the Manage tags/series/etc. dialogs"
|
||||
|
||||
- title: "News download: Add support for images embedded in the HTML"
|
||||
|
||||
- title: "calibre -s now waits for calibre to shutdown"
|
||||
|
||||
bug fixes:
|
||||
- title: "Workaround for iTunes breaking scripting with version 10.6.3 on OS X."
|
||||
tickets: [1012243]
|
||||
|
||||
- title: "EPUB Input: When there are multiple elements of the same type in the OPF guide, use the first rather than the last element."
|
||||
|
||||
- title: "Windows: Disable the new UI style if the color depth of the desktop is less than 32 bits per pixel"
|
||||
|
||||
- title: "ISBNDB metadata plugin: Return results even though they have no comments"
|
||||
|
||||
- title: "More robust handling of EINTR during IPC"
|
||||
|
||||
- title: "Metadata download: Support for amazon's new results page markup"
|
||||
|
||||
- title: "EPUB Output: Fix a bug that could cause corrupted output when doing an EPUB/OEB to EPUB conversion if the input EPUB had multiple files with the same name"
|
||||
|
||||
- title: "KF8 Output: Fix a couple of bugs that could lead to generation of invalid KF8 files."
|
||||
tickets: [1016672]
|
||||
|
||||
improved recipes:
|
||||
- ABC Digital
|
||||
- O Globo
|
||||
|
||||
new recipes:
|
||||
- title: Sign of the Times and New Statesman
|
||||
author: TerminalVeracity
|
||||
|
||||
- title: CT24
|
||||
author: zoidozoido
|
||||
|
||||
- title: SmileZilla
|
||||
author: Will
|
||||
|
||||
- title: Marketing Sensoriale
|
||||
author: NotTaken
|
||||
|
||||
- version: 0.8.57
|
||||
date: 2012-06-22
|
||||
|
||||
new features:
|
||||
- title: "PDF Output: Full pagination support. No more cutoff bottom line."
|
||||
type: major
|
||||
description: "Fixes a long standing bug in calibre's PDF Output that caused the bottom line of some pages to be partially cut off and prevented top and bottom margins from working."
|
||||
|
||||
- title: "calibredb add now prints out the ids of added books"
|
||||
tickets: [1014303]
|
||||
|
||||
- title: "Kobo Vox driver: Add support for new Google Play firmware"
|
||||
tickets: [1014129]
|
||||
|
||||
- title: "Driver for Prestigio PMP5097PRO"
|
||||
tickets: [1013864]
|
||||
|
||||
- title: "Add option to disable tooltips in the book list under Preferences->Look & Feel"
|
||||
|
||||
- title: "When customizing builtin recipes download the latest version of the recipe to customize instead of using the possibly out of date bundled version"
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Use the cover from the input document when no cover is specified during a conversion"
|
||||
|
||||
- title: "E-book Viewer: Printing now has proper pagination with top and bottom margins no lines partially cut-off at the bottom and full style retention"
|
||||
|
||||
- title: "KF8 Input: Handle files with incorrectly encoded guide type entries."
|
||||
tickets: [1015020]
|
||||
|
||||
- title: "E-book viewer: Disable hyphenation on windows xp as Qt WebKit barfs on soft hyphens on windows XP"
|
||||
|
||||
- title: "Handle OS X systems with invalid palette colors."
|
||||
tickets: [1014900]
|
||||
|
||||
- title: "Tag Browser: Fix regression that broke partitioning of hierarchical categories."
|
||||
tickets: [1014065]
|
||||
|
||||
- title: "LRF Output: Handle negative page margins"
|
||||
tickets: [1014103]
|
||||
|
||||
- title: "Template language: Fix arithmetic functions to tolerate the value 'None' as returned by raw_field()"
|
||||
|
||||
- title: "Fix custom title sort set in the edit metadata dialog getting reset by the conversion dialog"
|
||||
|
||||
improved recipes:
|
||||
- The Economist
|
||||
- Akter
|
||||
- 24 Sata sr
|
||||
- Novi List
|
||||
- Metro Montreal
|
||||
- Mode Durable
|
||||
- CanardPC
|
||||
- The Economic Collapse
|
||||
- Our Daily Bread
|
||||
|
||||
new recipes:
|
||||
- title: Akter Daily
|
||||
author: Darko MIletic
|
||||
|
||||
- title: BBC Brasil
|
||||
author: Claviola
|
||||
|
||||
- title: Homopedia.pl
|
||||
author: rainbowwarrior
|
||||
|
||||
- title: National Geographic Magazine
|
||||
author: Terminal Veracity
|
||||
|
||||
- title: Something Awful
|
||||
author: atordo
|
||||
|
||||
- title: Huffington Post UK
|
||||
author: Krittika Goyal
|
||||
|
||||
- version: 0.8.56
|
||||
date: 2012-06-15
|
||||
|
||||
new features:
|
||||
- title: "Make the new calibre style default on Windows and OS X."
|
||||
type: major
|
||||
description: "This change gives a more 'modern' feel to the calibre user interface with focus highlighting, gradients, rounded corners, etc. In case you prefer the old look, you can restore under Preferences->Look & Feel->User interface style"
|
||||
|
||||
- title: "Get Books: Add the new SONY Reader store"
|
||||
|
||||
- title: "Read metadata from .docx (Microsoft Word) files"
|
||||
|
||||
- title: "Allow customizing the behavior of the searching for similar books by right clicking the book. You can now tell calibre to search different columns than the traditional author/series/publisher/tags/etc. in Preferences->Searching"
|
||||
|
||||
- title: "Add option to restore alternating row colors to the Tag Browser under Preferences->Look & Feel->Tag Browser"
|
||||
|
||||
- title: "Update to Qt 4.8.2 on windows compiled with link time code generation for a small performance boost"
|
||||
|
||||
bug fixes:
|
||||
- title: "Get Books: Update plugins to handle website changes at ebooks.com, project gutenberg, and virtualo"
|
||||
|
||||
- title: "AZW3 Output: Fix TOC at start option not working"
|
||||
|
||||
- title: "AZW3 Output: Close self closing script/style/title/head tags explicitly as they cause problems in webkit based renderers like the Kindle Fire and calibre's viewers."
|
||||
|
||||
- title: "Fix the current_library_name() template function not updating after a library switch"
|
||||
|
||||
- title: "AZW3 Output: Handle the case of a link pointing to the last line of text in the document."
|
||||
tickets: [1011330]
|
||||
|
||||
- title: "Fix regression in 0.8.55 that broke highlighting of items matching a search in the Tag Browser"
|
||||
tickets: [1011030]
|
||||
|
||||
- title: "News download: Handle query only relative URLs"
|
||||
|
||||
improved recipes:
|
||||
- Christian Science Monitor
|
||||
- Neue Zurcher Zeitung
|
||||
- Birmignham Post
|
||||
- Metro UK
|
||||
- New Musical Express
|
||||
- The Independent
|
||||
- The Daily Mirror
|
||||
- Vreme
|
||||
- Smithsonian Magazine
|
||||
|
||||
new recipes:
|
||||
- title: NZZ Webpaper
|
||||
author: Bernd Leinfelder
|
||||
|
||||
|
||||
- version: 0.8.55
|
||||
date: 2012-06-08
|
||||
|
||||
new features:
|
||||
- title: "Add a new 'Calibre style' interface look that is more modern than the default look. You can select it via Preferences->Look & Feel->User interface style."
|
||||
|
||||
- title: "New, subtler look for the Tag Browser"
|
||||
|
||||
- title: "Driver for Trekstor Pyrus and Pantech Android Tablet"
|
||||
tickets: [1008946, 1007929]
|
||||
|
||||
- title: "Conversion pipeline: Handle guide elements with incorrectly cased hrefs. Also handle guide elements of type coverimagestandard and thumbimagestandard."
|
||||
|
||||
- title: "Allow user to customize trekstor plugin to send books into sub directories."
|
||||
tickets: [1007646]
|
||||
|
||||
- title: "EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now."
|
||||
tickets: [1008810]
|
||||
|
||||
- title: "Save single format to disk: Only show the format available in the selected books."
|
||||
tickets: [1007287]
|
||||
|
||||
bug fixes:
|
||||
- title: "MOBI Output: When using the insert metadata at start of book option, do not use a table to layout the metadata, as the Kindle Fire crashes when rendering the table."
|
||||
tickets: [1002119]
|
||||
|
||||
- title: "Device detection: Fix a bug that could cause device detection to fail completely if devices with certain vendor/product ids are connected."
|
||||
tickets: [1009718]
|
||||
|
||||
- title: "MOBI Output: When rasterizing svgs only compute style information when an actual svg image is present. Small speedup when converting large svg-free documents to MOBI."
|
||||
|
||||
- title: "SONY T1 driver: Fix support for collections of books placed on the SD card"
|
||||
tickets: [986044]
|
||||
|
||||
- title: "Fix partitioning problems in tag browser with fields that have no name, such as identifiers and formats"
|
||||
|
||||
- title: "Welcome wizard: Preferentially use the kindle email address set as default when more than one such address exists."
|
||||
tickets: [1007932 ]
|
||||
|
||||
- title: "Fix regression in 0.8.54 that broke the use of the shortcut Alt+A to select books by the same author"
|
||||
|
||||
improved recipes:
|
||||
- Various Polish recipes
|
||||
- Vice Magazine
|
||||
- EL Mundo Today
|
||||
- Haaretz
|
||||
- Good Housekeeping
|
||||
- El Pais
|
||||
- Christian Science Monitor
|
||||
- Marketing Magazine
|
||||
- Instapaper
|
||||
|
||||
new recipes:
|
||||
- title: Various Philippine news sources
|
||||
author: jde
|
||||
|
||||
- title: Natemat.pl and wirtualnemedia.pl
|
||||
author: fenuks
|
||||
|
||||
- title: Rabble.ca
|
||||
author: timtoo
|
||||
|
||||
- version: 0.8.54
|
||||
date: 2012-05-31
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: The Table of contents panel now tracks the current position in the book. As you scroll through the book, the entry you are currently on is highlighted."
|
||||
type: major
|
||||
description: "To see this feature in action, open the Table of Contents panel in the viewer by clicking the button with three blue lines on it. As you page through the book, the chapter you are reading currently is highlighted in the Table of Contents Panel. Obviously, this will only work if the book you are reading has a Table of Contents. You can also use the Ctrl+PgUp and Ctrl+PgDn keys to quickly skip between chapters."
|
||||
|
||||
- title: "calibredb: Allow setting metadata for individual fields with the set_metadata command"
|
||||
|
||||
- title: "Make it a little harder to accidentally change the sorting of items in the Tag Browser. Also frees up more vertical space for the Tag Browser itself."
|
||||
|
||||
- title: "The calibre user manual is now available in AZW3 format as well as EPUB"
|
||||
|
||||
bug fixes:
|
||||
- title: "Automatic titlecasing: No longer try to capitalize scottish names, as there are too many special cases."
|
||||
tickets: [775825]
|
||||
|
||||
- title: "Never crash when reading metadata from PDF files (reading now always happens in a worker process)"
|
||||
tickets: [1006452]
|
||||
|
||||
- title: "EPUB Input: Do no skip the valid children of an NCX node that has no text/href"
|
||||
|
||||
- title: "Archos driver: Detect SD card"
|
||||
tickets: [1005650]
|
||||
|
||||
- title: "When bulk downloading metadata and the user deletes one of the books for which metadata is being downloaded, just ignore it, instead of erroring out"
|
||||
|
||||
- title: "When deleting books from the bottom of the booklist, ensure that the bottom book after deleting is selected"
|
||||
|
||||
- title: "Fix regression in 0.8.53 that broke sending APNX files to older Kindle devices"
|
||||
|
||||
- title: "Use correct text color for selected rows in the list of matches when downloading metadata and showing results in get books."
|
||||
tickets: [1004568]
|
||||
|
||||
improved recipes:
|
||||
- The Independent
|
||||
- Welt der Physik
|
||||
- China Daily
|
||||
- The Grid
|
||||
- Prospect Magazine
|
||||
|
||||
new recipes:
|
||||
- title: La gazetta del Mezzogiorno
|
||||
author: faber1971
|
||||
|
||||
|
@ -574,28 +574,33 @@ format, whether input or output are available in the conversion dialog under the
|
||||
Convert Microsoft Word documents
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document
|
||||
as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
|
||||
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word
|
||||
produces really messy HTML, converting it can take a long time, so be patient. Another alternative is to
|
||||
use the free OpenOffice. Open your .doc file in OpenOffice and save it in OpenOffice's format .odt. |app| can
|
||||
directly convert .odt files.
|
||||
|app| can automatically convert ``.docx`` files created by Microsoft Word 2007 and
|
||||
newer. Just add the file to |app| and click convert (make sure you are running
|
||||
the latest version of |app| as support for ``.docx`` files is very new).
|
||||
|
||||
There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
|
||||
generating the Table of Contents much simpler. It is called BookCreator and is available for free
|
||||
at `mobileread <http://www.mobileread.com/forums/showthread.php?t=28313>`_.
|
||||
.. note::
|
||||
There is a `demo .docx file <http://calibre-ebook.com/downloads/demos/demo.docx>`_
|
||||
that demonstrates the capabilities of the |app| conversion engine. Just
|
||||
download it and convert it to EPUB or AZW3 to see what |app| can do.
|
||||
|
||||
An easy way to generate a Table of Contents when converting a Word document is:
|
||||
|app| will automatically generate a Table of Contents based on headings if you mark
|
||||
your headings with the ``Heading 1``, ``Heading 2``, etc. styles in Word. Open
|
||||
the output ebook in the calibre viewer and click the Table of Contents button
|
||||
to view the generated Table of Contents.
|
||||
|
||||
1. Mark your Chapters and sub-Chapters in the doc file with one of the MS built-in styles called 'Heading 1', 'Heading 2', ..., 'Heading 6'. 'Heading 1' equates to the HTML tag <h1>, 'Heading 2' to <h2> etc
|
||||
Older .doc files
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
2. Save the doc as Webpage-filtered (rather than Webpage) and import the html file into |app|
|
||||
|
||||
3. When you convert in |app| you use what you did in step 1 to set the box called 'Detect chapters at' on the Convert - Structure Detection page. For example:
|
||||
|
||||
* If you mark Chapters with style 'Heading 2' then set the 'Detect chapters at' box to //h:h2 This will give you a proper external metadata TOC in the converted epub.
|
||||
* A slightly more complex example...if your book has Sections and Chapters and you want a 2-level nested metadata TOC. Mark the doc Sections with style 'Heading 2' and the Chapters with style 'Heading 3'. When you convert set the 'Detect chapters at' box to //h:h2|//h:h3. On the Convert - TOC page set the 'Level 1 TOC' box to //h:h2 and the 'Level 2 TOC' box to //h:h3.
|
||||
For older .doc files, you can save the document as HTML with Microsoft Word
|
||||
and then convert the resulting HTML file with |app|. When saving as
|
||||
HTML, be sure to use the "Save as Web Page, Filtered" option as this will
|
||||
produce clean HTML that will convert well. Note that Word produces really messy
|
||||
HTML, converting it can take a long time, so be patient. If you have a newer
|
||||
version of Word available, you can directly save it as docx as well.
|
||||
|
||||
Another alternative is to use the free OpenOffice. Open your .doc file in
|
||||
OpenOffice and save it in OpenOffice's format .odt. |app| can directly convert
|
||||
.odt files.
|
||||
|
||||
Convert TXT documents
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -115,16 +115,27 @@ commits::
|
||||
Be careful to not include merges when using ``HEAD~n``.
|
||||
|
||||
If you plan to do a lot of development on |app|, then the best method is to create a
|
||||
`GitHub <http://github.com>`_ account. Once you have an account, follow the
|
||||
steps at `Setup Git <https://help.github.com/articles/set-up-git>`_ and
|
||||
`Fork A Repo <https://help.github.com/articles/fork-a-repo>`_ to create your own fork of the
|
||||
`calibre GitHub repository <https://github.com/kovidgoyal/calibre>`_. Read
|
||||
`Pushing to a remote <https://help.github.com/articles/pushing-to-a-remote>`_
|
||||
to learn how to upload your commits to GitHub.
|
||||
`GitHub <http://github.com>`_ account. Below is a basic guide to setting up
|
||||
your own fork of calibre in a way that will allow you to submit pull requests
|
||||
for inclusion into the main |app| repository:
|
||||
|
||||
* Setup git on your machine as described in this article: `Setup Git <https://help.github.com/articles/set-up-git>`_
|
||||
* Setup ssh keys for authentication to GitHub, as described here: `Generating SSH keys <https://help.github.com/articles/generating-ssh-keys>`_
|
||||
* Go to https://github.com/kovidgoyal/calibre and click the :guilabel:`Fork` button.
|
||||
* In a Terminal do::
|
||||
|
||||
git clone git@github.com:<username>/calibre.git
|
||||
|
||||
Replace <username> above with your github username. That will get your fork checked out locally.
|
||||
* You can make changes and commit them whenever you like. When you are ready to have your work merged, do a::
|
||||
|
||||
git push
|
||||
|
||||
and go to ``https://github.com/<username>/calibre`` and click the :guilabel:`Pull Request` button to generate a pull request that can be merged.
|
||||
* You can update your local copy with code from the main repo at any time by doing::
|
||||
|
||||
git pull upstream
|
||||
|
||||
You can contribute your code in the form of `Pull Requests
|
||||
<https://help.github.com/articles/using-pull-requests>`_. Generally, you should
|
||||
create a new branch for any feature that is non-trivial.
|
||||
|
||||
You should also keep an eye on the |app| `development forum
|
||||
<http://www.mobileread.com/forums/forumdisplay.php?f=240>`_. Before making
|
||||
@ -297,10 +308,14 @@ code, with access to the |app| modules::
|
||||
|
||||
is great for testing a little snippet of code on the command line. It works in the same way as the -c switch to the python interpreter::
|
||||
|
||||
calibre-debug -e myscript.py
|
||||
calibre-debug myscript.py
|
||||
|
||||
can be used to execute your own Python script. It works in the same way as passing the script to the Python interpreter, except
|
||||
that the calibre environment is fully initialized, so you can use all the calibre code in your script.
|
||||
that the calibre environment is fully initialized, so you can use all the calibre code in your script. To use command line arguments with your script, use the form::
|
||||
|
||||
calibre-debug myscript.py -- --option1 arg1
|
||||
|
||||
The ``--`` causes all subsequent arguments to be passed to your script.
|
||||
|
||||
|
||||
Using |app| in your projects
|
||||
@ -313,7 +328,7 @@ Binary install of |app|
|
||||
|
||||
If you have a binary install of |app|, you can use the Python interpreter bundled with |app|, like this::
|
||||
|
||||
calibre-debug -e /path/to/your/python/script.py
|
||||
calibre-debug /path/to/your/python/script.py -- arguments to your script
|
||||
|
||||
Source install on Linux
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
@ -20,7 +20,7 @@ What formats does |app| support conversion to/from?
|
||||
|app| supports the conversion of many input formats to many output formats.
|
||||
It can convert every input format in the following list, to every output format.
|
||||
|
||||
*Input Formats:* CBZ, CBR, CBC, CHM, DJVU, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
|
||||
*Input Formats:* CBZ, CBR, CBC, CHM, DJVU, DOCX, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
|
||||
|
||||
*Output Formats:* AZW3, EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ
|
||||
|
||||
@ -29,13 +29,14 @@ It can convert every input format in the following list, to every output format.
|
||||
PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers.
|
||||
PDB is also a generic format. |app| supports eReder, Plucker, PML and zTxt PDB files.
|
||||
DJVU support is only for converting DJVU files that contain embedded text. These are typically generated by OCR software.
|
||||
MOBI books can be of two types Mobi6 and KF8. |app| fully supports both. MOBI files often have .azw or .azw3 file extensions
|
||||
MOBI books can be of two types Mobi6 and KF8. |app| fully supports both. MOBI files often have .azw or .azw3 file extensions.
|
||||
DOCX files from Microsoft Word 2007 and newer are supported.
|
||||
|
||||
.. _best-source-formats:
|
||||
|
||||
What are the best source formats to convert?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
In order of decreasing preference: LIT, MOBI, AZW, EPUB, AZW3, FB2, HTML, PRC, RTF, PDB, TXT, PDF
|
||||
In order of decreasing preference: LIT, MOBI, AZW, EPUB, AZW3, FB2, DOCX, HTML, PRC, ODT, RTF, PDB, TXT, PDF
|
||||
|
||||
I converted a PDF file, but the result has various problems?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -6,30 +6,37 @@ import datetime
|
||||
class FSP(BasicNewsRecipe):
|
||||
|
||||
title = u'Folha de S\xE3o Paulo'
|
||||
__author__ = 'fluzao'
|
||||
__author__ = 'Joao Eduardo Bertacchi'
|
||||
description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
|
||||
u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
|
||||
|
||||
# found this to be the easiest place to find the index page (13-Nov-2011).
|
||||
#found this to be the easiest place to find the index page (13-Nov-2011).
|
||||
# searching for the "Indice Geral" link
|
||||
HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
|
||||
today=datetime.date.today()
|
||||
FIRSTPAGE= 'cp' + str(today.day).zfill(2) + str(today.month).zfill(2) + str(today.year) + '.shtml'
|
||||
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
||||
|
||||
language = 'pt_BR'
|
||||
no_stylesheets = True
|
||||
max_articles_per_feed = 40
|
||||
max_articles_per_feed = 50
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
|
||||
remove_tags_before = dict(name='p')
|
||||
remove_tags = [dict(name='td', attrs={'align':'center'})]
|
||||
# remove_tags_before = dict(name='p')
|
||||
# remove_tags_before = dict(name='div', id='articleNew')
|
||||
# remove_tags_after = dict(name='div', id='articleNew')
|
||||
keep_only_tags = [dict(name='div', id='articleNew'), dict(name='table', attrs={'class':'articleGraphic'})]
|
||||
publication_type = 'newspaper'
|
||||
simultaneous_downloads = 5
|
||||
# remove_tags = [dict(name='td', attrs={'align':'center'})]
|
||||
remove_attributes = ['height','width']
|
||||
# fixes the problem with the section names
|
||||
section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada',
|
||||
'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o',
|
||||
'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade',
|
||||
'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio',
|
||||
'imoveis' : u'im\xf3veis', 'negocios' : u'neg\xf3cios',
|
||||
section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \
|
||||
'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \
|
||||
'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \
|
||||
'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio', \
|
||||
'imoveis' : u'im\xf3veis', 'negocios' : u'neg\xf3cios', \
|
||||
'veiculos' : u've\xedculos', 'corrida' : 'folha corrida'}
|
||||
|
||||
# this solves the problem with truncated content in Kindle
|
||||
@ -39,6 +46,40 @@ class FSP(BasicNewsRecipe):
|
||||
# Indice e Comunicar Erros
|
||||
preprocess_regexps = [(re.compile(r'<!--/NOTICIA-->.*Comunicar Erros</a>',
|
||||
re.DOTALL|re.IGNORECASE), lambda match: r'')]
|
||||
extra_css = """
|
||||
#articleNew { font: 18px Times New Roman,verdana,arial; }
|
||||
img { background: none !important; float: none; margin: 0px; }
|
||||
.newstexts { list-style-type: none; height: 20px; margin: 15px 0 10px 0; }
|
||||
.newstexts.last { border-top: 1px solid #ccc; margin: 5px 0 15px 0; padding-top: 15px; }
|
||||
.newstexts li { display: inline; padding: 0 5px; }
|
||||
.newstexts li.prev { float: left; }
|
||||
.newstexts li.next { float: right; }
|
||||
.newstexts li span { width: 12px; height: 15px; display: inline-block; }
|
||||
.newstexts li.prev span { background-position: -818px -46px; }
|
||||
.newstexts li.next span { background-position: -832px -46px; }
|
||||
.newstexts li a { font: bold 12px arial, verdana, sans-serif; text-transform: uppercase; color: #999; text-decoration: none !important; }
|
||||
.newstexts li a:hover { text-decoration: underline !important }
|
||||
.headerart { font-weight: bold; }
|
||||
.title { font: bold 39px Times New Roman,verdana,arial; margin-bottom: 15px; margin-top: 10px; }
|
||||
.creditart, .origin { font: bold 12px arial, verdana, sans-serif; color: #999; margin: 0px; display: block; }
|
||||
.headerart p, .fine_line p { margin: 0 !important; }
|
||||
.fine_line { font: bold 18px Times New Roman,verdana,arial; }
|
||||
.fine_line p { margin-bottom: 18px !important; }
|
||||
.fine_line p:first-child { font-weight: normal; font-style: italic; font-size: 20px !important; }
|
||||
.eye { display: block; width: 317px; border-top: 2px solid #666; padding: 7px 0 7px; border-bottom: 2px solid #666; font-style: italic; font-weight: bold; }
|
||||
.kicker { font-weight: bold; text-transform: uppercase; font-size: 18px; font-family: Times New Roman,verdana,arial !important; }
|
||||
.blue { color: #000080; }
|
||||
.red { color: #F00; }
|
||||
.blue { color: #000080; }
|
||||
.green { color: #006400; }
|
||||
.orange { color: #FFA042; }
|
||||
.violet { color: #8A2BE2; }
|
||||
.text_footer { font-size: 15px; }
|
||||
.title_end { font-size: 23px; font-weight: bold; }
|
||||
.divisor { text-indent: -9999px; border-bottom: 1px solid #ccc; height: 1px; margin: 0; }
|
||||
.star { background: none !important; height: 15px; }
|
||||
.articleGraphic { margin-bottom: 20px; }
|
||||
"""
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
@ -48,23 +89,33 @@ class FSP(BasicNewsRecipe):
|
||||
br['user'] = self.username
|
||||
br['pass'] = self.password
|
||||
br.submit().read()
|
||||
# if 'Please try again' in raw:
|
||||
# raise Exception('Your username and password are incorrect')
|
||||
## if 'Please try again' in raw:
|
||||
## raise Exception('Your username and password are incorrect')
|
||||
return br
|
||||
|
||||
# def postprocess_html(self, soup, first_fetch):
|
||||
# #Clean-up normal articles
|
||||
# tags = soup.findAll('div', id='articleNew')
|
||||
# if tags and tags[0]:
|
||||
# return tags[0]
|
||||
# #Clean-up first page
|
||||
# tags = soup.findAll('div', attrs={'class':'double_column facsimile'})
|
||||
# if tags and tags[0]:
|
||||
# return tags[0]
|
||||
# return soup
|
||||
|
||||
def parse_index(self):
|
||||
# Searching for the index page on the HOMEPAGE
|
||||
# hpsoup = self.index_to_soup(self.HOMEPAGE)
|
||||
# indexref = hpsoup.find('a', href=re.compile('^indices.*'))
|
||||
# self.log('--> tag containing the today s index: ', indexref)
|
||||
# INDEX = indexref['href']
|
||||
# INDEX = 'http://www1.folha.uol.com.br/'+INDEX
|
||||
today=datetime.date.today()
|
||||
INDEX = 'http://www1.folha.uol.com.br/' + 'fsp/indices/index-' + str(today).replace('-','') + '.shtml'
|
||||
#Searching for the index page on the HOMEPAGE
|
||||
hpsoup = self.index_to_soup(self.HOMEPAGE)
|
||||
#indexref = hpsoup.find('a', href=re.compile('^indices.*'))
|
||||
#self.log('--> tag containing the today s index: ', indexref)
|
||||
#INDEX = indexref['href']
|
||||
#INDEX = 'http://www1.folha.uol.com.br/'+INDEX
|
||||
INDEX = 'http://www1.folha.uol.com.br/' + 'fsp/indices/index-' + str(self.today).replace('-','') + '.shtml'
|
||||
self.log('--> INDEX after extracting href and adding prefix: ', INDEX)
|
||||
# ... and taking the opportunity to get the cover image link
|
||||
# coverurl = hpsoup.find('a', href=re.compile('^cp.*'))['href']
|
||||
coverurl = 'cp' + str(today.day).zfill(2) + str(today.month).zfill(2) + str(today.year) + '.shtml'
|
||||
#coverurl = hpsoup.find('a', href=re.compile('^cp.*'))['href']
|
||||
coverurl = self.FIRSTPAGE
|
||||
if coverurl:
|
||||
self.log('--> tag containing the today s cover: ', coverurl)
|
||||
coverurl = coverurl.replace('shtml', 'jpg')
|
||||
@ -72,35 +123,37 @@ class FSP(BasicNewsRecipe):
|
||||
self.log('--> coverurl after extracting href and adding prefix: ', coverurl)
|
||||
self.cover_url = coverurl
|
||||
|
||||
# soup = self.index_to_soup(self.INDEX)
|
||||
#soup = self.index_to_soup(self.INDEX)
|
||||
soup = self.index_to_soup(INDEX)
|
||||
|
||||
feeds = []
|
||||
articles = []
|
||||
section_title = "Preambulo"
|
||||
section_title = u'Primeira p\xe1gina'
|
||||
for post in soup.findAll('a'):
|
||||
# if name=True => new section
|
||||
strpost = str(post)
|
||||
# if strpost.startswith('<a name'):
|
||||
if re.match('<a href="/fsp/.*/index-' + str(today).replace('-','') + '.shtml"><span class="', strpost):
|
||||
#if strpost.startswith('<a name'):
|
||||
if re.match('<a href="/fsp/.*/index-' + str(self.today).replace('-','') + '.shtml"><span class="', strpost):
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
self.log()
|
||||
self.log('--> new section found, creating old section feed: ', section_title)
|
||||
# section_title = post['name']
|
||||
#section_title = post['name']
|
||||
section_title = self.tag_to_string(post)
|
||||
if section_title in self.section_dict:
|
||||
section_title = self.section_dict[section_title]
|
||||
articles = []
|
||||
self.log('--> new section title: ', section_title)
|
||||
elif strpost.startswith('<a href="/fsp/cp'):
|
||||
break
|
||||
elif strpost.startswith('<a href'):
|
||||
url = post['href']
|
||||
# this bit is kept if they ever go back to the old format (pre Nov-2011)
|
||||
#this bit is kept if they ever go back to the old format (pre Nov-2011)
|
||||
if url.startswith('/fsp'):
|
||||
url = 'http://www1.folha.uol.com.br'+url
|
||||
#
|
||||
if url.startswith('http://www1.folha.uol.com.br/fsp'):
|
||||
# url = 'http://www1.folha.uol.com.br'+url
|
||||
#url = 'http://www1.folha.uol.com.br'+url
|
||||
title = self.tag_to_string(post)
|
||||
self.log()
|
||||
self.log('--> post: ', post)
|
||||
@ -111,15 +164,15 @@ class FSP(BasicNewsRecipe):
|
||||
feeds.append((section_title, articles))
|
||||
|
||||
# keeping the front page url
|
||||
# minha_capa = feeds[0][1][1]['url']
|
||||
#minha_capa = feeds[0][1][1]['url']
|
||||
|
||||
# removing the first section ('Preambulo')
|
||||
del feeds[0]
|
||||
#del feeds[0][1][0]
|
||||
|
||||
# inserting the cover page as the first article (nicer for kindle users)
|
||||
# feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
|
||||
feeds.insert(0,(u'Capa', [{'title':u'Capa' , 'url':self.get_cover_url().replace('jpg', 'shtml')}]))
|
||||
#feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}]))
|
||||
#feeds[0][1].insert(0,{'title':u'fac-s\xedmile da capa' , 'url':self.HOMEPAGE+self.FIRSTPAGE})
|
||||
return feeds
|
||||
|
||||
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
@ -5,7 +6,6 @@ frontlineonnet.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Frontlineonnet(BasicNewsRecipe):
|
||||
@ -18,7 +18,7 @@ class Frontlineonnet(BasicNewsRecipe):
|
||||
delay = 1
|
||||
INDEX = 'http://frontlineonnet.com/'
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf-8'
|
||||
language = 'en_IN'
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://frontlineonnet.com/images/newfline.jpg'
|
||||
@ -45,37 +45,36 @@ class Frontlineonnet(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
keep_only_tags= [
|
||||
dict(name='font', attrs={'class':'storyhead'})
|
||||
,dict(attrs={'class':'byline'})
|
||||
dict(name='div', attrs={'id':'content'})
|
||||
#,dict(attrs={'class':'byline'})
|
||||
]
|
||||
remove_attributes=['size','noshade','border']
|
||||
#remove_attributes=['size','noshade','border']
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
#def preprocess_html(self, soup):
|
||||
#for item in soup.findAll(style=True):
|
||||
#del item['style']
|
||||
#for item in soup.findAll('img'):
|
||||
#if not item.has_key('alt'):
|
||||
#item['alt'] = 'image'
|
||||
#return soup
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
for feed_link in soup.findAll('a',href=True):
|
||||
if feed_link['href'].startswith('stories/'):
|
||||
url = self.INDEX + feed_link['href']
|
||||
title = self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
for feed_link in soup.findAll('div', id='headseccol'):
|
||||
a = feed_link.find('a', href=True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a['href']
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :''
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
return [('Frontline', articles)]
|
||||
|
||||
def print_version(self, url):
|
||||
return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2]
|
||||
#def print_version(self, url):
|
||||
#return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2]
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
return url.replace('../images/', self.INDEX + 'images/').strip()
|
||||
#def image_url_processor(self, baseurl, url):
|
||||
#return url.replace('../images/', self.INDEX + 'images/').strip()
|
||||
|
@ -1,35 +0,0 @@
|
||||
import urllib, re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre import __appname__
|
||||
|
||||
class GoogleReader(BasicNewsRecipe):
|
||||
title = 'Google Reader'
|
||||
description = 'This recipe fetches from your Google Reader account unread Starred items and unread Feeds you have placed in a folder via the manage subscriptions feature.'
|
||||
needs_subscription = True
|
||||
__author__ = 'davec, rollercoaster, Starson17'
|
||||
base_url = 'http://www.google.com/reader/atom/'
|
||||
oldest_article = 365
|
||||
max_articles_per_feed = 250
|
||||
get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
|
||||
use_embedded_content = True
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
|
||||
('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
|
||||
response = br.open('https://www.google.com/accounts/ClientLogin', request)
|
||||
auth = re.search('Auth=(\S*)', response.read()).group(1)
|
||||
cookies = mechanize.CookieJar()
|
||||
br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
|
||||
br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
|
||||
return br
|
||||
|
||||
def get_feeds(self):
|
||||
feeds = []
|
||||
soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
|
||||
for id in soup.findAll(True, attrs={'name':['id']}):
|
||||
url = id.contents[0]
|
||||
feeds.append((re.search('/([^/]*)$', url).group(1),
|
||||
self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
|
||||
return feeds
|
@ -1,35 +0,0 @@
|
||||
import urllib, re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre import __appname__
|
||||
|
||||
class GoogleReaderUber(BasicNewsRecipe):
|
||||
title = 'Google Reader uber'
|
||||
description = 'Fetches all feeds from your Google Reader account including the uncategorized items.'
|
||||
needs_subscription = True
|
||||
__author__ = 'davec, rollercoaster, Starson17'
|
||||
base_url = 'http://www.google.com/reader/atom/'
|
||||
oldest_article = 365
|
||||
max_articles_per_feed = 250
|
||||
get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
|
||||
use_embedded_content = True
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
|
||||
('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
|
||||
response = br.open('https://www.google.com/accounts/ClientLogin', request)
|
||||
auth = re.search('Auth=(\S*)', response.read()).group(1)
|
||||
cookies = mechanize.CookieJar()
|
||||
br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
|
||||
br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
|
||||
return br
|
||||
|
||||
def get_feeds(self):
|
||||
feeds = []
|
||||
soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
|
||||
for id in soup.findAll(True, attrs={'name':['id']}):
|
||||
url = id.contents[0].replace('broadcast','reading-list')
|
||||
feeds.append((re.search('/([^/]*)$', url).group(1),
|
||||
self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
|
||||
return feeds
|
@ -1,5 +1,4 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lanacion.com.ar
|
||||
'''
|
||||
@ -45,36 +44,32 @@ class Lanacion(BasicNewsRecipe):
|
||||
remove_tags_after = dict(attrs={'id':'relacionadas'})
|
||||
|
||||
feeds = [
|
||||
(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
|
||||
,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
|
||||
,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
|
||||
,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
|
||||
,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' )
|
||||
,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' )
|
||||
,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' )
|
||||
,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' )
|
||||
,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' )
|
||||
,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' )
|
||||
,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' )
|
||||
,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' )
|
||||
,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' )
|
||||
,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' )
|
||||
,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' )
|
||||
,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' )
|
||||
,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' )
|
||||
,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312')
|
||||
,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363')
|
||||
,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348')
|
||||
,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734')
|
||||
,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373')
|
||||
,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344')
|
||||
,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380')
|
||||
,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276')
|
||||
(u'Politica' , u'http://lanacion.com.ar.feedsportal.com/politica' )
|
||||
,(u'Deportes' , u'http://lanacion.com.ar.feedsportal.com/deportes' )
|
||||
,(u'Economia' , u'http://lanacion.com.ar.feedsportal.com/economia' )
|
||||
,(u'Sociedad' , u'http://lanacion.com.ar.feedsportal.com/sociedad' )
|
||||
,(u'Seguridad' , u'http://lanacion.com.ar.feedsportal.com/seguridad' )
|
||||
,(u'Buenos Aires' , u'http://lanacion.com.ar.feedsportal.com/buenosaires' )
|
||||
,(u'Opinion' , u'http://lanacion.com.ar.feedsportal.com/opinion' )
|
||||
,(u'Espectaculos' , u'http://lanacion.com.ar.feedsportal.com/espectaculos' )
|
||||
,(u'El Mundo' , u'http://lanacion.com.ar.feedsportal.com/mundo' )
|
||||
,(u'Revista' , u'http://lanacion.com.ar.feedsportal.com/revistalanacion' )
|
||||
,(u'Enfoques' , u'http://lanacion.com.ar.feedsportal.com/enfoques' )
|
||||
,(u'Comercio Exterior' , u'http://lanacion.com.ar.feedsportal.com/comercioexterior' )
|
||||
,(u'Tecnologia' , u'http://lanacion.com.ar.feedsportal.com/tecnologia' )
|
||||
,(u'Turismo' , u'http://lanacion.com.ar.feedsportal.com/turismo' )
|
||||
,(u'Al volante' , u'http://lanacion.com.ar.feedsportal.com/alvolante' )
|
||||
,(u'El Campo' , u'http://lanacion.com.ar.feedsportal.com/elcampo' )
|
||||
,(u'Moda y Belleza' , u'http://lanacion.com.ar.feedsportal.com/modaybelleza' )
|
||||
,(u'Inmuebles Comerciales', u'http://lanacion.com.ar.feedsportal.com/inmueblescomerciales' )
|
||||
,(u'Countries' , u'http://lanacion.com.ar.feedsportal.com/countries' )
|
||||
,(u'adnCultura' , u'http://lanacion.com.ar.feedsportal.com/adncultura' )
|
||||
,(u'The WSJ Americas' , u'http://lanacion.com.ar.feedsportal.com/wallstreetjournalamericas')
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = BasicNewsRecipe.get_article_url(self,article)
|
||||
link = article.get('guid', None)
|
||||
if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
|
||||
return self.browser.open_novisit(link).geturl()
|
||||
if link.rfind('galeria=') > 0:
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1274742400(BasicNewsRecipe):
|
||||
|
||||
title = u'Las Vegas Review Journal'
|
||||
@ -9,24 +10,24 @@ class AdvancedUserRecipe1274742400(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
|
||||
max_articles_per_feed = 100
|
||||
#keep_only_tags = [dict(id='content-main')]
|
||||
#remove_tags = [dict(id=['right-col-content', 'trending-topics']),
|
||||
# keep_only_tags = [dict(id='content-main')]
|
||||
# remove_tags = [dict(id=['right-col-content', 'trending-topics']),
|
||||
#{'class':['ppy-outer']}
|
||||
#]
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'News', u'http://www.lvrj.com/news.rss'),
|
||||
(u'Business', u'http://www.lvrj.com/business.rss'),
|
||||
(u'Living', u'http://www.lvrj.com/living.rss'),
|
||||
(u'Opinion', u'http://www.lvrj.com/opinion.rss'),
|
||||
(u'Neon', u'http://www.lvrj.com/neon.rss'),
|
||||
#(u'Image', u'http://www.lvrj.com/image.rss'),
|
||||
#(u'Home & Garden', u'http://www.lvrj.com/home_and_garden.rss'),
|
||||
#(u'Furniture & Design', u'http://www.lvrj.com/furniture_and_design.rss'),
|
||||
#(u'Drive', u'http://www.lvrj.com/drive.rss'),
|
||||
#(u'Real Estate', u'http://www.lvrj.com/real_estate.rss'),
|
||||
(u'Sports', u'http://www.lvrj.com/sports.rss')]
|
||||
(u'Top Stories', u'http://www.reviewjournal.com/rss.xml'),
|
||||
(u'News', u'http://www.reviewjournal.com/news/feed'),
|
||||
(u'Business', u'http://www.reviewjournal.com/business/feed'),
|
||||
(u'Living', u'http://www.reviewjournal.com/living/feed'),
|
||||
(u'Opinion', u'http://www.reviewjournal.com/opinion/feed'),
|
||||
(u'Neon', u'http://www.reviewjournal.com/neon/feed'),
|
||||
#(u'Image', u'http://www.lvrj.com/image.rss'),
|
||||
#(u'Home & Garden', u'http://www.lvrj.com/home_and_garden.rss'),
|
||||
#(u'Furniture & Design', u'http://www.lvrj.com/furniture_and_design.rss'),
|
||||
#(u'Drive', u'http://www.lvrj.com/drive.rss'),
|
||||
#(u'Real Estate', u'http://www.lvrj.com/real_estate.rss'),
|
||||
(u'Sports', u'http://www.reviewjournal.com/sports/feed')]
|
||||
|
@ -39,6 +39,8 @@ from BeautifulSoup import BeautifulSoup
|
||||
Version 1.9.4 19-04-2013
|
||||
Added regex filter for mailto
|
||||
Updated for new layout of metro-site
|
||||
Version 1.9.5 28-05-2013
|
||||
Added some extra id's and classes to remove
|
||||
'''
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
@ -46,7 +48,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
oldest_article = 1.2
|
||||
max_articles_per_feed = 25
|
||||
__author__ = u'DrMerry'
|
||||
description = u'Metro Nederland v1.9.4 2013-04-19'
|
||||
description = u'Metro Nederland v1.9.5 2013-05-28, Download nieuws van de Nederlandse editie van de krant Metro'
|
||||
language = u'nl'
|
||||
simultaneous_downloads = 5
|
||||
masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
|
||||
@ -70,7 +72,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
#(re.compile(r'<(a |/a)[^>]*>', re.DOTALL|re.IGNORECASE),lambda match:'')
|
||||
#(re.compile('(</?)h2', re.DOTALL|re.IGNORECASE),lambda match:'\1em')
|
||||
]
|
||||
|
||||
|
||||
remove_tags_before= dict(id='subwrapper')
|
||||
remove_tags_after = dict(name='div', attrs={'class':['body-area','article-main-area']})
|
||||
#name='div', attrs={'class':['subwrapper']})]
|
||||
@ -80,13 +82,13 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['iframe','script','noscript','style']),
|
||||
dict(name='div', attrs={'class':['aside clearfix','aside clearfix middle-col-line','comments','share-tools','article-right-column','column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}),
|
||||
dict(id=['article-2','googleads','column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1','sharez_container','ts-container','topshares','ts-title']),
|
||||
dict(name='div', attrs={'class':['fact-related-box','aside clearfix','aside clearfix middle-col-line','comments','share-tools','article-right-column','column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}),
|
||||
dict(id=['super-carousel','article-2','googleads','column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1','sharez_container','ts-container','topshares','ts-title']),
|
||||
dict(name='a', attrs={'name':'comments'}),
|
||||
#dict(name='div', attrs={'data-href'}),
|
||||
dict(name='img', attrs={'class':'top-line','title':'volledig scherm'}),
|
||||
dict(attrs={'style':re.compile('^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$'),'title':'volledig scherm'})]
|
||||
|
||||
|
||||
'''removed by before/after:
|
||||
id:
|
||||
column-1-5-top,'hidden_div','footer',
|
||||
@ -182,7 +184,7 @@ class MerryProcess(BeautifulSoup):
|
||||
except:
|
||||
pass
|
||||
return soup
|
||||
|
||||
|
||||
def moveTitleAndAuthor(self, soup):
|
||||
moveitem = soup.h1
|
||||
pubdate = soup.find(id="date")
|
||||
@ -218,4 +220,4 @@ class MerryProcess(BeautifulSoup):
|
||||
self.removeArrayOfTags(emptytags)
|
||||
#recursive in case removing empty tag creates new empty tag
|
||||
self.removeEmptyTags(soup, run=run)
|
||||
return soup
|
||||
return soup
|
||||
|
49
recipes/neu_osnabrucker_zeitung.recipe
Normal file
49
recipes/neu_osnabrucker_zeitung.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1344926684(BasicNewsRecipe):
|
||||
title = u'Neue Osnabrücker Zeitung'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
# auto_cleanup = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article'}),
|
||||
dict(name='span', attrs={'id':'articletext'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'retresco-title'}),
|
||||
dict(name='div', attrs={'class':'retresco-item s1 relative'}),
|
||||
dict(name='a', attrs={'class':'medium2 largeSpaceTop icon'}),
|
||||
dict(name='div', attrs={'class':'articleFunctions inlineTeaserRight'}),
|
||||
dict(name='div', attrs={'class':'imageContainer '}),
|
||||
dict(name='div', attrs={'class':'imageContainer centerContainer'}),
|
||||
dict(name='div', attrs={'class':'grid singleCol articleTeaser'}),
|
||||
dict(name='h3', attrs={'class':'teaserRow'}),
|
||||
dict(name='div', attrs={'class':'related-comments'}),
|
||||
dict(name='a', attrs={'class':' icon'}),
|
||||
dict(name='a', attrs={'class':'right small'}),
|
||||
dict(name='span', attrs={'class':'small block spaceBottom rectangleAd'}),
|
||||
dict(name='div', attrs={'class':'furtherGalleries largeSpaceTop'})
|
||||
]
|
||||
|
||||
feeds = [(u'Lokales', u'http://www.noz.de/rss/Lokales'),
|
||||
(u'Vermischtes', u'http://www.noz.de/rss/Vermischtes'),
|
||||
(u'Politik', u'http://www.noz.de/rss/Politik'),
|
||||
(u'Wirtschaft', u'http://www.noz.de/rss/Wirtschaft'),
|
||||
(u'Kultur', u'http://www.noz.de/rss/Kultur'),
|
||||
(u'Medien', u'http://www.noz.de/rss/Medien'),
|
||||
(u'Wissenschaft', u'http://www.noz.de/rss/wissenschaft'),
|
||||
(u'Sport', u'http://www.noz.de/rss/Sport'),
|
||||
(u'Computer', u'http://www.noz.de/rss/Computer'),
|
||||
(u'Musik', u'http://www.noz.de/rss/Musik'),
|
||||
(u'Szene', u'http://www.noz.de/rss/Szene'),
|
||||
(u'Niedersachsen', u'http://www.noz.de/rss/Niedersachsen'),
|
||||
(u'Kino', u'http://www.noz.de/rss/Kino')]
|
||||
|
@ -29,24 +29,22 @@ class NYTimesSports(BasicNewsRecipe):
|
||||
category = 'Sports'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
#cover_url ='http://bit.ly/h8F4DO'
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//div[@class="articleSpanImage"]'
|
||||
feeds = [
|
||||
(u'The Fifth Down', u'http://fifthdown.blogs.nytimes.com/feed/'),
|
||||
(u'Off The Dribble', u'http://offthedribble.blogs.nytimes.com/feed/'),
|
||||
(u'The Quad', u'http://thequad.blogs.nytimes.com/feed/'),
|
||||
(u'Slap Shot', u'http://slapshot.blogs.nytimes.com/feed/'),
|
||||
(u'Goal', u'http://goal.blogs.nytimes.com/feed/'),
|
||||
(u'Bats', u'http://bats.blogs.nytimes.com/feed/'),
|
||||
(u'Straight Sets', u'http://straightsets.blogs.nytimes.com/feed/'),
|
||||
(u'Formula One', u'http://formulaone.blogs.nytimes.com/feed/'),
|
||||
(u'On Par', u'http://onpar.blogs.nytimes.com/feed/'),
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'header'}),
|
||||
dict(name='h1'),
|
||||
dict(name='h2'),
|
||||
dict(name='div', attrs={'class':'entry-content'})]
|
||||
(u'The Fifth Down', u'http://fifthdown.blogs.nytimes.com/feed/'),
|
||||
(u'Off The Dribble', u'http://offthedribble.blogs.nytimes.com/feed/'),
|
||||
(u'The Quad', u'http://thequad.blogs.nytimes.com/feed/'),
|
||||
(u'Slap Shot', u'http://slapshot.blogs.nytimes.com/feed/'),
|
||||
(u'Goal', u'http://goal.blogs.nytimes.com/feed/'),
|
||||
(u'Bats', u'http://bats.blogs.nytimes.com/feed/'),
|
||||
(u'Straight Sets', u'http://straightsets.blogs.nytimes.com/feed/'),
|
||||
(u'Formula One', u'http://formulaone.blogs.nytimes.com/feed/'),
|
||||
(u'On Par', u'http://onpar.blogs.nytimes.com/feed/'),
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
|
35
recipes/seventh_guard.recipe
Normal file
35
recipes/seventh_guard.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
class SeventhGuard(BasicNewsRecipe):
|
||||
title = '7th Guard'
|
||||
description= u""" 7thGuard.net jest jednym z najstarszych w polskiej części internetu serwisem poświęconym otwartości standardów,
|
||||
wolności oprogramowania, szeroko pojętych wolności w internecie, walce z praktykami monopolistycznymi w świecie informatyki oraz
|
||||
– co jest coraz ważniejsze w dzisiejszym świecie – rozwojowi społeczeństwa informacyjnego."""
|
||||
__author__ = 'koliberek'
|
||||
masthead_url='http://beta.7thguard.net/wp-content/uploads/2013/05/7thguard-bladerunner-logo1.png'
|
||||
cover_url = 'http://beta.7thguard.net/wp-content/uploads/2013/05/7thguard-bladerunner-logo1.png'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
timefmt = ' [%A, %d %B %Y]'
|
||||
auto_cleanup = True
|
||||
remove_javascript=True
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
conversion_options = {'smarten_punctuation' : True}
|
||||
extra_css="""h2 {font-size:12pt; font-family:Arial,Helvetica,sans serif;}
|
||||
h1 {font-size:14pt; font-family:Arial,Helvetica,sans serif;}
|
||||
p {text-align:justify;}
|
||||
.article, .feed, .calibre_feed_description, .article_description {font-family:Arial,Helvetica,sans serif;}
|
||||
.article_description {text-style:italic;}
|
||||
"""
|
||||
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height', 'font']
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''), (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
|
||||
|
||||
feeds = [(u'Aktualno\u015bci', u'http://7thguard.net/feed/')]
|
||||
|
||||
|
@ -1,77 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
|
||||
'''
|
||||
time.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.feeds.jsnews import JavascriptRecipe
|
||||
from lxml import html
|
||||
|
||||
class Time(BasicNewsRecipe):
|
||||
def wait_for_load(browser):
|
||||
# This element is present in the black login bar at the top
|
||||
browser.wait_for_element('#site-header p.constrain', timeout=180)
|
||||
|
||||
# Keep the login method as standalone, so it can be easily tested
|
||||
def do_login(browser, username, password):
|
||||
from calibre.web.jsbrowser.browser import Timeout
|
||||
browser.visit('http://www.time.com/time/magazine')
|
||||
form = browser.select_form('#magazine-signup')
|
||||
form['username'] = username
|
||||
form['password'] = password
|
||||
browser.submit('#paid-wall-submit')
|
||||
try:
|
||||
wait_for_load(browser)
|
||||
except Timeout:
|
||||
raise ValueError('Failed to login to time.com, check your username and password and try again in a little while.')
|
||||
|
||||
|
||||
class Time(JavascriptRecipe):
|
||||
title = u'Time'
|
||||
__author__ = 'Kovid Goyal, Rick Shang'
|
||||
description = ('Weekly US magazine.')
|
||||
encoding = 'utf-8'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Weekly US magazine.'
|
||||
language = 'en'
|
||||
needs_subscription = True
|
||||
requires_version = (0, 9, 35)
|
||||
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
keep_only_tags = ['article.post']
|
||||
remove_tags = ['meta', '.entry-sharing', '.entry-footer', '.wp-paginate',
|
||||
'.post-rail', '.entry-comments', '.entry-tools',
|
||||
'#paid-wall-cm-ad']
|
||||
|
||||
keep_only_tags = [
|
||||
{
|
||||
'class':['primary-col', 'tout1']
|
||||
},
|
||||
]
|
||||
remove_tags = [
|
||||
{'class':['button', 'entry-sharing group', 'wp-paginate',
|
||||
'moving-markup', 'entry-comments']},
|
||||
recursions = 1
|
||||
links_from_selectors = ['.wp-paginate a.page[href]']
|
||||
|
||||
]
|
||||
extra_css = '.entry-date { padding-left: 2ex }'
|
||||
|
||||
preprocess_regexps = [(re.compile(
|
||||
r'<meta .+/>'), lambda m:'')]
|
||||
def do_login(self, browser, username, password):
|
||||
do_login(browser, username, password)
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
# This site uses javascript in its login process
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.time.com/time/magazine')
|
||||
br.select_form(predicate=lambda f: 'action' in f.attrs and f.attrs['action'] == 'https://auth.time.com/login.php')
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
# br['magcode'] = ['TD']
|
||||
br.find_control('turl').readonly = False
|
||||
br['turl'] = 'http://www.time.com/time/magazine'
|
||||
br.find_control('rurl').readonly = False
|
||||
br['rurl'] = 'http://www.time.com/time/magazine'
|
||||
br['remember'] = False
|
||||
raw = br.submit().read()
|
||||
if False and '>Log Out<' not in raw:
|
||||
# This check is disabled as it does not work (there is probably
|
||||
# some cookie missing) however, the login is "sufficient" for
|
||||
# the actual article downloads to work.
|
||||
raise ValueError('Failed to login to time.com, check'
|
||||
' your username and password')
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
raw = self.index_to_soup('http://www.time.com/time/magazine', raw=True)
|
||||
def get_publication_data(self, browser):
|
||||
selector = 'section.sec-mag-showcase ul.ul-mag-showcase img[src]'
|
||||
cover = browser.css_select(selector)
|
||||
# URL for large cover
|
||||
cover_url = unicode(cover.evaluateJavaScript('this.src').toString()).replace('_400.', '_600.')
|
||||
raw = browser.html
|
||||
ans = {'cover': browser.get_resource(cover_url)}
|
||||
# We are already at the magazine page thanks to the do_login() method
|
||||
root = html.fromstring(raw)
|
||||
img = root.xpath('//a[.="View Large Cover" and @href]')
|
||||
if img:
|
||||
cover_url = 'http://www.time.com' + img[0].get('href')
|
||||
try:
|
||||
nsoup = self.index_to_soup(cover_url)
|
||||
img = nsoup.find('img', src=re.compile('archive/covers'))
|
||||
if img is not None:
|
||||
self.cover_url = img['src']
|
||||
except:
|
||||
self.log.exception('Failed to fetch cover')
|
||||
|
||||
dates = ''.join(root.xpath('//time[@class="updated"]/text()'))
|
||||
if dates:
|
||||
@ -90,27 +80,22 @@ class Time(BasicNewsRecipe):
|
||||
if articles:
|
||||
feeds.append((section, articles))
|
||||
|
||||
return feeds
|
||||
ans['index'] = feeds
|
||||
return ans
|
||||
|
||||
def find_articles(self, sec):
|
||||
|
||||
for article in sec.xpath('./article'):
|
||||
h2 = article.xpath('./*[@class="entry-title"]')
|
||||
if not h2: continue
|
||||
if not h2:
|
||||
continue
|
||||
a = h2[0].xpath('./a[@href]')
|
||||
if not a: continue
|
||||
if not a:
|
||||
continue
|
||||
title = html.tostring(a[0], encoding=unicode,
|
||||
method='text').strip()
|
||||
if not title: continue
|
||||
if not title:
|
||||
continue
|
||||
url = a[0].get('href')
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.time.com'+url
|
||||
if '/article/0,' in url:
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.find('a', href=lambda x:x and '/printout/' in x)
|
||||
url = a['href'].replace('/printout', '/subscriber/printout')
|
||||
else:
|
||||
url += 'print/' if url.endswith('/') else '/print/'
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.time.com'+url
|
||||
desc = ''
|
||||
@ -126,10 +111,35 @@ class Time(BasicNewsRecipe):
|
||||
'description' : desc
|
||||
}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for fig in soup.findAll('figure'):
|
||||
img = fig.find('img')
|
||||
if img is not None:
|
||||
fig.replaceWith(img)
|
||||
return soup
|
||||
def load_complete(self, browser, url, recursion_level):
|
||||
# This is needed as without it, subscriber content is blank. time.com
|
||||
# appears to be using some crazy iframe+js callback for loading content
|
||||
wait_for_load(browser)
|
||||
return True
|
||||
|
||||
def postprocess_html(self, article, root, url, recursion_level):
|
||||
# Remove the header and page n of m messages from pages after the first
|
||||
# page
|
||||
if recursion_level > 0:
|
||||
for h in root.xpath('//header[@class="entry-header"]|//span[@class="page"]'):
|
||||
h.getparent().remove(h)
|
||||
# Unfloat the article images and also remove them from pages after the
|
||||
# first page as they are repeated on every page.
|
||||
for fig in root.xpath('//figure'):
|
||||
parent = fig.getparent()
|
||||
if recursion_level > 0:
|
||||
parent.remove(fig)
|
||||
else:
|
||||
idx = parent.index(fig)
|
||||
for img in reversed(fig.xpath('descendant::img')):
|
||||
parent.insert(idx, img)
|
||||
parent.remove(fig)
|
||||
return root
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test the login
|
||||
import sys
|
||||
from calibre import jsbrowser
|
||||
br = jsbrowser(default_timeout=120)
|
||||
do_login(br, sys.argv[-2], sys.argv[-1])
|
||||
br.show_browser()
|
||||
|
@ -1,74 +1,58 @@
|
||||
import re, urllib
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from lxml import html
|
||||
|
||||
allowed_sections = {'Top Headlines', 'Opinion', 'Science', 'Education', 'US', 'Pakistan', 'India Business', 'Tech News', 'Cricket', 'Bollywood'}
|
||||
|
||||
class TimesOfIndia(BasicNewsRecipe):
|
||||
title = u'Times of India'
|
||||
language = 'en_IN'
|
||||
title = u'Times of India Headlines'
|
||||
language = 'en'
|
||||
description = 'Headline news from the Indian daily Times of India'
|
||||
__author__ = 'Kovid Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
|
||||
no_stylesheets = True
|
||||
remove_attributes = ['style']
|
||||
keep_only_tags = [
|
||||
{'class':re.compile(r'maintable12|prttabl')},
|
||||
{'id':['mod-article-header',
|
||||
'mod-a-body-after-first-para', 'mod-a-body-first-para']},
|
||||
]
|
||||
no_javascript = True
|
||||
keep_only_tags = [dict(name='h1'), dict(id=['storydiv', 'contentarea'])]
|
||||
remove_tags = [
|
||||
{'class':re.compile('tabsintbgshow|prvnxtbg')},
|
||||
{'id':['fbrecommend', 'relmaindiv', 'shretxt', 'fbrecos', 'twtdiv',
|
||||
'gpls', 'auim']},
|
||||
{'class':['twitter-share-button', 'cmtmn']},
|
||||
]
|
||||
dict(name='div', attrs={'class':['video_list', 'rightpart', 'clearfix mTop15', 'footer_slider', 'read_more', 'flR', 'hide_new']}),
|
||||
dict(name='div', attrs={'id':[
|
||||
'most_pop', 'relartstory', 'slidebox', 'tmpFbokk', 'twittersource',
|
||||
'reportAbuseDiv', 'result', 'yahoobuzzsyn', 'fb-root']}),
|
||||
dict(style='float:right;margin-left:5px;'),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Top Stories',
|
||||
'http://timesofindia.indiatimes.com/rssfeedstopstories.cms'),
|
||||
('India',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/-2128936835.cms'),
|
||||
('World',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/296589292.cms'),
|
||||
('Mumbai',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/-2128838597.cms'),
|
||||
('Entertainment',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/1081479906.cms'),
|
||||
('Cricket',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/4719161.cms'),
|
||||
('Sunday TOI',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/1945062111.cms'),
|
||||
('Life and Style',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/2886704.cms'),
|
||||
('Business',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/1898055.cms'),
|
||||
('Mad Mad World',
|
||||
'http://timesofindia.indiatimes.com/rssfeeds/2178430.cms'),
|
||||
('Most Read',
|
||||
'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
|
||||
]
|
||||
def parse_index(self):
|
||||
index = 'http://timesofindia.indiatimes.com/home/headlines'
|
||||
raw = self.index_to_soup(index, raw=True)
|
||||
root = html.fromstring(raw)
|
||||
|
||||
feeds = []
|
||||
current_section = None
|
||||
current_articles = []
|
||||
|
||||
toc = root.xpath('//div[@align="center"]/descendant::table[@class="cnt"]')[0]
|
||||
|
||||
for x in toc.xpath('descendant::*[name()="h3" or (name()="ul" and @class="content")]'):
|
||||
if x.tag == 'h3':
|
||||
if current_articles and current_section in allowed_sections:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = html.tostring(x, method='text', encoding=unicode).strip()
|
||||
current_articles = []
|
||||
self.log(current_section)
|
||||
else:
|
||||
for a in x.xpath('descendant::li/descendant::a[@href]'):
|
||||
title = html.tostring(a, method='text', encoding=unicode).strip()
|
||||
url = a.get('href')
|
||||
if url.startswith('/'):
|
||||
url = 'http://timesofindia.indiatimes.com' + url
|
||||
self.log(' ', title)
|
||||
current_articles.append({'title':title, 'url':url})
|
||||
self.log('')
|
||||
|
||||
if current_articles and current_section in allowed_sections:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
return feeds
|
||||
|
||||
def get_article_url(self, article):
|
||||
try:
|
||||
s = article.summary
|
||||
return urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
link = article.get('link', None)
|
||||
if link and link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
|
||||
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://'}
|
||||
for k, v in encoding.iteritems():
|
||||
link = link.replace(k, v)
|
||||
return link
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?prtpage=1'
|
||||
|
||||
def preprocess_html(self, soup, *args):
|
||||
byl = soup.find(attrs={'class':'byline'})
|
||||
if byl is not None:
|
||||
for l in byl.findAll('label'):
|
||||
l.extract()
|
||||
return soup
|
||||
|
@ -1,3 +1,6 @@
|
||||
" Scan the following dirs (recursively for tags
|
||||
let g:project_tags_dirs = ['src/calibre']
|
||||
|
||||
" Include directories for C++ modules
|
||||
let g:syntastic_cpp_include_dirs = [
|
||||
\'/usr/include/python2.7',
|
||||
@ -27,7 +30,7 @@ fun! CalibreLog()
|
||||
hi def link au Keyword
|
||||
syntax match au /^.*:::$/
|
||||
nnoremap <silent> <buffer> n :call cursor(1+search('\V:::\$', 'n'), 0)<CR>
|
||||
nnoremap <silent> <buffer> yb vt#t<Space>y
|
||||
nnoremap <silent> <buffer> yb v/#<CR>t<Space>y:nohl<CR>
|
||||
normal! gg2j
|
||||
edit Changelog.yaml
|
||||
edit src/calibre/constants.py
|
||||
|
@ -63,7 +63,7 @@ def upload_signatures():
|
||||
shell=True)
|
||||
shutil.rmtree(tdir)
|
||||
|
||||
class ReUpload(Command): # {{{
|
||||
class ReUpload(Command): # {{{
|
||||
|
||||
description = 'Re-uplaod any installers present in dist/'
|
||||
|
||||
@ -118,7 +118,7 @@ def run_remote_upload(args):
|
||||
|
||||
# }}}
|
||||
|
||||
class UploadInstallers(Command): # {{{
|
||||
class UploadInstallers(Command): # {{{
|
||||
|
||||
def add_options(self, parser):
|
||||
parser.add_option('--replace', default=False, action='store_true', help=
|
||||
@ -172,7 +172,7 @@ class UploadInstallers(Command): # {{{
|
||||
run_remote_upload(args)
|
||||
# }}}
|
||||
|
||||
class UploadUserManual(Command): # {{{
|
||||
class UploadUserManual(Command): # {{{
|
||||
description = 'Build and upload the User Manual'
|
||||
sub_commands = ['manual']
|
||||
|
||||
@ -184,7 +184,8 @@ class UploadUserManual(Command): # {{{
|
||||
with CurrentDir(path):
|
||||
with ZipFile(f, 'w') as zf:
|
||||
for x in os.listdir('.'):
|
||||
if x.endswith('.swp'): continue
|
||||
if x.endswith('.swp'):
|
||||
continue
|
||||
zf.write(x)
|
||||
if os.path.isdir(x):
|
||||
for y in os.listdir(x):
|
||||
@ -203,7 +204,7 @@ class UploadUserManual(Command): # {{{
|
||||
'bugs:%s'%USER_MANUAL]), shell=True)
|
||||
# }}}
|
||||
|
||||
class UploadDemo(Command): # {{{
|
||||
class UploadDemo(Command): # {{{
|
||||
|
||||
description = 'Rebuild and upload various demos'
|
||||
|
||||
@ -223,20 +224,20 @@ class UploadDemo(Command): # {{{
|
||||
check_call('scp /tmp/html-demo.zip divok:%s/'%(DOWNLOADS,), shell=True)
|
||||
# }}}
|
||||
|
||||
class UploadToServer(Command): # {{{
|
||||
class UploadToServer(Command): # {{{
|
||||
|
||||
description = 'Upload miscellaneous data to calibre server'
|
||||
|
||||
def run(self, opts):
|
||||
check_call('ssh divok rm -f %s/calibre-\*.tar.xz'%DOWNLOADS, shell=True)
|
||||
#check_call('scp dist/calibre-*.tar.xz divok:%s/'%DOWNLOADS, shell=True)
|
||||
# check_call('scp dist/calibre-*.tar.xz divok:%s/'%DOWNLOADS, shell=True)
|
||||
check_call('gpg --armor --detach-sign dist/calibre-*.tar.xz',
|
||||
shell=True)
|
||||
check_call('scp dist/calibre-*.tar.xz.asc divok:%s/signatures/'%DOWNLOADS,
|
||||
shell=True)
|
||||
check_call('ssh divok /usr/local/bin/update-calibre',
|
||||
shell=True)
|
||||
check_call('''ssh divok echo %s \\> %s/latest_version'''\
|
||||
check_call('''ssh divok echo %s \\> %s/latest_version'''
|
||||
%(__version__, DOWNLOADS), shell=True)
|
||||
check_call('ssh divok /etc/init.d/apache2 graceful',
|
||||
shell=True)
|
||||
|
@ -408,6 +408,10 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None,
|
||||
|
||||
return opener
|
||||
|
||||
def jsbrowser(*args, **kwargs):
|
||||
from calibre.web.jsbrowser.browser import Browser
|
||||
return Browser(*args, **kwargs)
|
||||
|
||||
def fit_image(width, height, pwidth, pheight):
|
||||
'''
|
||||
Fit image in box of width pwidth and height pheight.
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 9, 33)
|
||||
numeric_version = (0, 9, 34)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -554,6 +554,7 @@ from calibre.ebooks.conversion.plugins.txt_input import TXTInput
|
||||
from calibre.ebooks.conversion.plugins.lrf_input import LRFInput
|
||||
from calibre.ebooks.conversion.plugins.chm_input import CHMInput
|
||||
from calibre.ebooks.conversion.plugins.snb_input import SNBInput
|
||||
from calibre.ebooks.conversion.plugins.docx_input import DOCXInput
|
||||
|
||||
from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput
|
||||
from calibre.ebooks.conversion.plugins.fb2_output import FB2Output
|
||||
@ -595,6 +596,7 @@ plugins += [
|
||||
LRFInput,
|
||||
CHMInput,
|
||||
SNBInput,
|
||||
DOCXInput,
|
||||
]
|
||||
plugins += [
|
||||
EPUBOutput,
|
||||
|
@ -985,11 +985,19 @@ class DB(object):
|
||||
else:
|
||||
if callable(getattr(data, 'read', None)):
|
||||
data = data.read()
|
||||
try:
|
||||
save_cover_data_to(data, path)
|
||||
except (IOError, OSError):
|
||||
time.sleep(0.2)
|
||||
save_cover_data_to(data, path)
|
||||
if data is None:
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
os.remove(path)
|
||||
except (IOError, OSError):
|
||||
time.sleep(0.2)
|
||||
os.remove(path)
|
||||
else:
|
||||
try:
|
||||
save_cover_data_to(data, path)
|
||||
except (IOError, OSError):
|
||||
time.sleep(0.2)
|
||||
save_cover_data_to(data, path)
|
||||
|
||||
def copy_format_to(self, book_id, fmt, fname, path, dest,
|
||||
windows_atomic_move=None, use_hardlink=False):
|
||||
|
@ -826,7 +826,8 @@ class Cache(object):
|
||||
@write_api
|
||||
def set_cover(self, book_id_data_map):
|
||||
''' Set the cover for this book. data can be either a QImage,
|
||||
QPixmap, file object or bytestring '''
|
||||
QPixmap, file object or bytestring. It can also be None, in which
|
||||
case any existing cover is removed. '''
|
||||
|
||||
for book_id, data in book_id_data_map.iteritems():
|
||||
try:
|
||||
@ -836,7 +837,8 @@ class Cache(object):
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
|
||||
self.backend.set_cover(book_id, path, data)
|
||||
self._set_field('cover', {book_id:1 for book_id in book_id_data_map})
|
||||
return self._set_field('cover', {
|
||||
book_id:(0 if data is None else 1) for book_id, data in book_id_data_map.iteritems()})
|
||||
|
||||
@write_api
|
||||
def set_metadata(self, book_id, mi, ignore_errors=False, force_changes=False,
|
||||
|
@ -24,16 +24,23 @@ if __name__ == '__main__':
|
||||
args = parser.parse_args()
|
||||
if args.name and args.name.startswith('.'):
|
||||
tests = find_tests()
|
||||
q = args.name[1:]
|
||||
if not q.startswith('test_'):
|
||||
q = 'test_' + q
|
||||
ans = None
|
||||
try:
|
||||
for suite in tests:
|
||||
for test in suite._tests:
|
||||
for s in test:
|
||||
if s._testMethodName == args.name[1:]:
|
||||
tests = s
|
||||
if s._testMethodName == q:
|
||||
ans = s
|
||||
raise StopIteration()
|
||||
except StopIteration:
|
||||
pass
|
||||
if ans is None:
|
||||
print ('No test named %s found' % args.name)
|
||||
raise SystemExit(1)
|
||||
tests = ans
|
||||
else:
|
||||
tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if args.name else find_tests()
|
||||
unittest.TextTestRunner(verbosity=4).run(tests)
|
||||
|
@ -355,7 +355,28 @@ class WritingTest(BaseTest):
|
||||
ae(opf.authors, ['author1', 'author2'])
|
||||
# }}}
|
||||
|
||||
def test_set_cover(self):
|
||||
def test_set_cover(self): # {{{
|
||||
' Test setting of cover '
|
||||
self.assertTrue(False, 'TODO: test set_cover() and set_metadata()')
|
||||
cache = self.init_cache()
|
||||
ae = self.assertEqual
|
||||
|
||||
# Test removing a cover
|
||||
ae(cache.field_for('cover', 1), 1)
|
||||
ae(cache.set_cover({1:None}), set([1]))
|
||||
ae(cache.field_for('cover', 1), 0)
|
||||
|
||||
img = b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00`\x00`\x00\x00\xff\xe1\x00\x16Exif\x00\x00II*\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xff\xdb\x00C\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xff\xc0\x00\x11\x08\x00\x01\x00\x01\x03\x01"\x00\x02\x11\x01\x03\x11\x01\xff\xc4\x00\x15\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\xff\xc4\x00\x14\x10\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4\x00\x14\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4\x00\x14\x11\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xda\x00\x0c\x03\x01\x00\x02\x11\x03\x11\x00?\x00\xbf\x80\x01\xff\xd9' # noqa {{{ }}}
|
||||
# Test setting a cover
|
||||
ae(cache.set_cover({bid:img for bid in (1, 2, 3)}), {1, 2, 3})
|
||||
old = self.init_old()
|
||||
for book_id in (1, 2, 3):
|
||||
ae(cache.cover(book_id), img, 'Cover was not set correctly for book %d' % book_id)
|
||||
ae(cache.field_for('cover', book_id), 1)
|
||||
ae(old.cover(book_id, index_is_id=True), img, 'Cover was not set correctly for book %d' % book_id)
|
||||
self.assertTrue(old.has_cover(book_id))
|
||||
# }}}
|
||||
|
||||
def test_set_metadata(self):
|
||||
' Test setting of metadata '
|
||||
self.assertTrue(False, 'TODO: test set_metadata()')
|
||||
|
||||
|
@ -461,7 +461,7 @@ class Writer(object):
|
||||
dt = field.metadata['datatype']
|
||||
self.accept_vals = lambda x: True
|
||||
if dt == 'composite' or field.name in {
|
||||
'id', 'cover', 'size', 'path', 'formats', 'news'}:
|
||||
'id', 'size', 'path', 'formats', 'news'}:
|
||||
self.set_books_func = dummy
|
||||
elif self.name[0] == '#' and self.name.endswith('_index'):
|
||||
self.set_books_func = custom_series_index
|
||||
|
@ -152,7 +152,8 @@ def add_simple_plugin(path_to_plugin):
|
||||
shutil.rmtree(tdir)
|
||||
|
||||
def print_basic_debug_info(out=None):
|
||||
if out is None: out = sys.stdout
|
||||
if out is None:
|
||||
out = sys.stdout
|
||||
out = functools.partial(prints, file=out)
|
||||
import platform
|
||||
from calibre.constants import (__appname__, get_version, isportable, isosx,
|
||||
@ -175,7 +176,7 @@ def print_basic_debug_info(out=None):
|
||||
|
||||
def run_debug_gui(logpath):
|
||||
import time
|
||||
time.sleep(3) # Give previous GUI time to shutdown fully and release locks
|
||||
time.sleep(3) # Give previous GUI time to shutdown fully and release locks
|
||||
from calibre.constants import __appname__
|
||||
prints(__appname__, _('Debug log'))
|
||||
print_basic_debug_info()
|
||||
@ -197,6 +198,12 @@ def run_script(path, args):
|
||||
g['__file__'] = ef
|
||||
execfile(ef, g)
|
||||
|
||||
def inspect_mobi(path):
|
||||
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||
prints('Inspecting:', path)
|
||||
inspect_mobi(path)
|
||||
print
|
||||
|
||||
def main(args=sys.argv):
|
||||
from calibre.constants import debug
|
||||
debug()
|
||||
@ -231,7 +238,7 @@ def main(args=sys.argv):
|
||||
main()
|
||||
elif opts.command:
|
||||
sys.argv = args
|
||||
exec opts.command
|
||||
exec(opts.command)
|
||||
elif opts.debug_device_driver:
|
||||
debug_device_driver()
|
||||
elif opts.add_simple_plugin is not None:
|
||||
@ -246,11 +253,8 @@ def main(args=sys.argv):
|
||||
sql_dump = args[-1]
|
||||
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
|
||||
elif opts.inspect_mobi:
|
||||
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||
for path in args[1:]:
|
||||
prints('Inspecting:', path)
|
||||
inspect_mobi(path)
|
||||
print
|
||||
elif opts.tweak_book:
|
||||
from calibre.ebooks.tweak import tweak
|
||||
tweak(opts.tweak_book)
|
||||
@ -274,6 +278,16 @@ def main(args=sys.argv):
|
||||
plugin.cli_main([plugin.name] + args[1:])
|
||||
elif len(args) >= 2 and args[1].rpartition('.')[-1] in {'py', 'recipe'}:
|
||||
run_script(args[1], args[2:])
|
||||
elif len(args) >= 2 and args[1].rpartition('.')[-1] in {'mobi', 'azw', 'azw3', 'docx'}:
|
||||
for path in args[1:]:
|
||||
ext = path.rpartition('.')[-1]
|
||||
if ext == 'docx':
|
||||
from calibre.ebooks.docx.dump import dump
|
||||
dump(path)
|
||||
elif ext in {'mobi', 'azw', 'azw3'}:
|
||||
inspect_mobi(path)
|
||||
else:
|
||||
print ('Cannot dump unknown filetype: %s' % path)
|
||||
else:
|
||||
from calibre import ipython
|
||||
ipython()
|
||||
@ -282,3 +296,4 @@ def main(args=sys.argv):
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
|
@ -1174,10 +1174,10 @@ class libiMobileDevice():
|
||||
self.plist_lib.plist_free(plist)
|
||||
|
||||
# To determine success, we need to inspect the returned plist
|
||||
if hasattr(result, 'Status'):
|
||||
if 'Status' in result:
|
||||
if self.verbose:
|
||||
self.log(" STATUS: %s" % result['Status'])
|
||||
elif hasattr(result, 'Error'):
|
||||
elif 'Error' in result:
|
||||
if self.verbose:
|
||||
self.log(" ERROR: %s" % result['Error'])
|
||||
raise libiMobileDeviceException(result['Error'])
|
||||
@ -1293,7 +1293,9 @@ class libiMobileDevice():
|
||||
else:
|
||||
index = 0
|
||||
while devices[index]:
|
||||
device_list.append(devices[index].contents.value)
|
||||
# Filter out redundant entries
|
||||
if devices[index].contents.value not in device_list:
|
||||
device_list.append(devices[index].contents.value)
|
||||
index += 1
|
||||
if self.verbose:
|
||||
self.log(" %s" % repr(device_list))
|
||||
|
@ -35,7 +35,7 @@ class KOBO(USBMS):
|
||||
gui_name = 'Kobo Reader'
|
||||
description = _('Communicate with the Kobo Reader')
|
||||
author = 'Timothy Legge and David Forrester'
|
||||
version = (2, 0, 11)
|
||||
version = (2, 0, 12)
|
||||
|
||||
dbversion = 0
|
||||
fwversion = 0
|
||||
@ -1218,7 +1218,7 @@ class KOBOTOUCH(KOBO):
|
||||
min_dbversion_images_on_sdcard = 77
|
||||
min_dbversion_activiy = 77
|
||||
|
||||
max_supported_fwversion = (2,5,3)
|
||||
max_supported_fwversion = (2,6,1)
|
||||
min_fwversion_images_on_sdcard = (2,4,1)
|
||||
|
||||
has_kepubs = True
|
||||
@ -2381,9 +2381,17 @@ class KOBOTOUCH(KOBO):
|
||||
"WHERE Shelf.Name = C.ShelfName "
|
||||
"AND c._IsDeleted <> 'true')")
|
||||
|
||||
delete_activity_query = ("DELETE FROM Activity "
|
||||
"WHERE Type = 'Shelf' "
|
||||
"AND NOT EXISTS "
|
||||
"(SELECT 1 FROM Shelf "
|
||||
"WHERE Shelf.Name = Activity.Id)"
|
||||
)
|
||||
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(delete_query)
|
||||
cursor.execute(update_query)
|
||||
cursor.execute(delete_activity_query)
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
|
||||
|
22
src/calibre/ebooks/conversion/plugins/docx_input.py
Normal file
22
src/calibre/ebooks/conversion/plugins/docx_input.py
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
|
||||
class DOCXInput(InputFormatPlugin):
|
||||
name = 'DOCX Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert DOCX files (.docx) to HTML'
|
||||
file_types = set(['docx'])
|
||||
|
||||
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.docx.to_html import Convert
|
||||
return Convert(stream, log=log)()
|
||||
|
@ -87,7 +87,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
return self._is_case_sensitive
|
||||
if not path or not os.path.exists(path):
|
||||
return islinux or isbsd
|
||||
self._is_case_sensitive = not (os.path.exists(path.lower()) \
|
||||
self._is_case_sensitive = not (os.path.exists(path.lower())
|
||||
and os.path.exists(path.upper()))
|
||||
return self._is_case_sensitive
|
||||
|
||||
@ -101,6 +101,8 @@ class HTMLInput(InputFormatPlugin):
|
||||
from calibre.ebooks.oeb.transforms.metadata import \
|
||||
meta_info_to_oeb_metadata
|
||||
from calibre.ebooks.html.input import get_filelist
|
||||
from calibre.ebooks.metadata import string_to_authors
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
import cssutils, logging
|
||||
cssutils.log.setLevel(logging.WARN)
|
||||
self.OEB_STYLES = OEB_STYLES
|
||||
@ -111,11 +113,20 @@ class HTMLInput(InputFormatPlugin):
|
||||
metadata = oeb.metadata
|
||||
meta_info_to_oeb_metadata(mi, metadata, log)
|
||||
if not metadata.language:
|
||||
oeb.logger.warn(u'Language not specified')
|
||||
metadata.add('language', get_lang().replace('_', '-'))
|
||||
l = canonicalize_lang(getattr(opts, 'language', None))
|
||||
if not l:
|
||||
oeb.logger.warn(u'Language not specified')
|
||||
l = get_lang().replace('_', '-')
|
||||
metadata.add('language', l)
|
||||
if not metadata.creator:
|
||||
oeb.logger.warn('Creator not specified')
|
||||
metadata.add('creator', self.oeb.translate(__('Unknown')))
|
||||
a = getattr(opts, 'authors', None)
|
||||
if a:
|
||||
a = string_to_authors(a)
|
||||
if not a:
|
||||
oeb.logger.warn('Creator not specified')
|
||||
a = [self.oeb.translate(__('Unknown'))]
|
||||
for aut in a:
|
||||
metadata.add('creator', aut)
|
||||
if not metadata.title:
|
||||
oeb.logger.warn('Title not specified')
|
||||
metadata.add('title', self.oeb.translate(__('Unknown')))
|
||||
@ -175,7 +186,8 @@ class HTMLInput(InputFormatPlugin):
|
||||
titles = []
|
||||
headers = []
|
||||
for item in self.oeb.spine:
|
||||
if not item.linear: continue
|
||||
if not item.linear:
|
||||
continue
|
||||
html = item.data
|
||||
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
|
||||
title = re.sub(r'\s+', ' ', title.strip())
|
||||
@ -193,7 +205,8 @@ class HTMLInput(InputFormatPlugin):
|
||||
if len(titles) > len(set(titles)):
|
||||
use = headers
|
||||
for title, item in izip(use, self.oeb.spine):
|
||||
if not item.linear: continue
|
||||
if not item.linear:
|
||||
continue
|
||||
toc.add(title, item.href)
|
||||
|
||||
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
|
||||
@ -291,3 +304,4 @@ class HTMLInput(InputFormatPlugin):
|
||||
self.log.exception('Failed to read CSS file: %r'%link)
|
||||
return (None, None)
|
||||
return (None, raw)
|
||||
|
||||
|
@ -87,9 +87,12 @@ def read_single_border(parent, edge):
|
||||
if sz is not None:
|
||||
# we dont care about art borders (they are only used for page borders)
|
||||
try:
|
||||
width = min(96, max(2, float(sz))) / 8
|
||||
# WebKit needs at least 1pt to render borders
|
||||
width = min(96, max(8, float(sz))) / 8
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if style == 'double' and width is not None and 0 < width < 3:
|
||||
width = 3 # WebKit needs 3pts to render double borders
|
||||
return {p:v for p, v in zip(border_props, (padding, width, style, color))}
|
||||
|
||||
def read_border(parent, dest, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'):
|
||||
@ -297,7 +300,7 @@ class ParagraphStyle(object):
|
||||
|
||||
# Misc.
|
||||
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
|
||||
'numbering', 'font_family', 'font_size', 'frame',
|
||||
'numbering', 'font_family', 'font_size', 'color', 'frame',
|
||||
)
|
||||
|
||||
def __init__(self, pPr=None):
|
||||
@ -321,7 +324,7 @@ class ParagraphStyle(object):
|
||||
for s in XPath('./w:pStyle[@w:val]')(pPr):
|
||||
self.linked_style = get(s, 'w:val')
|
||||
|
||||
self.font_family = self.font_size = inherit
|
||||
self.font_family = self.font_size = self.color = inherit
|
||||
|
||||
self._css = None
|
||||
|
||||
@ -365,7 +368,7 @@ class ParagraphStyle(object):
|
||||
if self.line_height not in {inherit, '1'}:
|
||||
c['line-height'] = self.line_height
|
||||
|
||||
for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size'):
|
||||
for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size', 'color'):
|
||||
val = getattr(self, x)
|
||||
if val is not inherit:
|
||||
if x == 'font_size':
|
||||
|
@ -36,7 +36,8 @@ def read_text_border(parent, dest):
|
||||
if sz is not None:
|
||||
# we dont care about art borders (they are only used for page borders)
|
||||
try:
|
||||
border_width = min(96, max(2, float(sz))) / 8
|
||||
# A border of less than 1pt is not rendered by WebKit
|
||||
border_width = min(96, max(8, float(sz))) / 8
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
@ -103,7 +104,7 @@ def read_underline(parent, dest):
|
||||
for col in XPath('./w:u[@w:val]')(parent):
|
||||
val = get(col, 'w:val')
|
||||
if val:
|
||||
ans = 'underline'
|
||||
ans = val if val == 'none' else 'underline'
|
||||
setattr(dest, 'text_decoration', ans)
|
||||
|
||||
def read_vert_align(parent, dest):
|
||||
@ -116,8 +117,12 @@ def read_vert_align(parent, dest):
|
||||
|
||||
def read_font_family(parent, dest):
|
||||
ans = inherit
|
||||
for col in XPath('./w:rFonts[@w:ascii]')(parent):
|
||||
val = get(col, 'w:ascii')
|
||||
for col in XPath('./w:rFonts')(parent):
|
||||
val = get(col, 'w:asciiTheme')
|
||||
if val:
|
||||
val = '|%s|' % val
|
||||
else:
|
||||
val = get(col, 'w:ascii')
|
||||
if val:
|
||||
ans = val
|
||||
setattr(dest, 'font_family', ans)
|
||||
@ -234,16 +239,5 @@ class RunStyle(object):
|
||||
return self._css
|
||||
|
||||
def same_border(self, other):
|
||||
for x in (self, other):
|
||||
has_border = False
|
||||
for y in ('color', 'style', 'width'):
|
||||
if ('border-%s' % y) in x.css:
|
||||
has_border = True
|
||||
break
|
||||
if not has_border:
|
||||
return False
|
||||
|
||||
s = tuple(self.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
|
||||
o = tuple(other.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
|
||||
return s == o
|
||||
return self.get_border_css({}) == other.get_border_css({})
|
||||
|
||||
|
136
src/calibre/ebooks/docx/cleanup.py
Normal file
136
src/calibre/ebooks/docx/cleanup.py
Normal file
@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
||||
def mergeable(previous, current):
|
||||
if previous.tail or current.tail:
|
||||
return False
|
||||
if previous.get('class', None) != current.get('class', None):
|
||||
return False
|
||||
if current.get('id', False):
|
||||
return False
|
||||
try:
|
||||
return next(previous.itersiblings()) is current
|
||||
except StopIteration:
|
||||
return False
|
||||
|
||||
|
||||
def append_text(parent, text):
|
||||
if len(parent) > 0:
|
||||
parent[-1].tail = (parent[-1].tail or '') + text
|
||||
else:
|
||||
parent.text = (parent.text or '') + text
|
||||
|
||||
|
||||
def merge(parent, span):
|
||||
if span.text:
|
||||
append_text(parent, span.text)
|
||||
for child in span:
|
||||
parent.append(child)
|
||||
if span.tail:
|
||||
append_text(parent, span.tail)
|
||||
span.getparent().remove(span)
|
||||
|
||||
|
||||
def merge_run(run):
|
||||
parent = run[0]
|
||||
for span in run[1:]:
|
||||
merge(parent, span)
|
||||
|
||||
|
||||
def liftable(css):
|
||||
# A <span> is liftable if all its styling would work just as well if it is
|
||||
# specified on the parent element.
|
||||
prefixes = {x.partition('-')[0] for x in css.iterkeys()}
|
||||
return not (prefixes - {'text', 'font', 'letter', 'color', 'background'})
|
||||
|
||||
|
||||
def add_text(elem, attr, text):
|
||||
old = getattr(elem, attr) or ''
|
||||
setattr(elem, attr, old + text)
|
||||
|
||||
|
||||
def lift(span):
|
||||
# Replace an element by its content (text, children and tail)
|
||||
parent = span.getparent()
|
||||
idx = parent.index(span)
|
||||
try:
|
||||
last_child = span[-1]
|
||||
except IndexError:
|
||||
last_child = None
|
||||
|
||||
if span.text:
|
||||
if idx == 0:
|
||||
add_text(parent, 'text', span.text)
|
||||
else:
|
||||
add_text(parent[idx - 1], 'tail', span.text)
|
||||
|
||||
for child in reversed(span):
|
||||
parent.insert(idx, child)
|
||||
parent.remove(span)
|
||||
|
||||
if span.tail:
|
||||
if last_child is None:
|
||||
if idx == 0:
|
||||
add_text(parent, 'text', span.tail)
|
||||
else:
|
||||
add_text(parent[idx - 1], 'tail', span.tail)
|
||||
else:
|
||||
add_text(last_child, 'tail', span.tail)
|
||||
|
||||
|
||||
def cleanup_markup(root, styles):
|
||||
# Merge consecutive spans that have the same styling
|
||||
current_run = []
|
||||
for span in root.xpath('//span'):
|
||||
if not current_run:
|
||||
current_run.append(span)
|
||||
else:
|
||||
last = current_run[-1]
|
||||
if mergeable(last, span):
|
||||
current_run.append(span)
|
||||
else:
|
||||
if len(current_run) > 1:
|
||||
merge_run(current_run)
|
||||
current_run = [span]
|
||||
|
||||
# Remove unnecessary span tags that are the only child of a parent block
|
||||
# element
|
||||
class_map = dict(styles.classes.itervalues())
|
||||
parents = ('p', 'div') + tuple('h%d' % i for i in xrange(1, 7))
|
||||
for parent in root.xpath('//*[(%s) and count(span)=1]' % ' or '.join('name()="%s"' % t for t in parents)):
|
||||
if len(parent) == 1 and not parent.text and not parent[0].tail and not parent[0].get('id', None):
|
||||
# We have a block whose contents are entirely enclosed in a <span>
|
||||
span = parent[0]
|
||||
span_class = span.get('class', None)
|
||||
span_css = class_map.get(span_class, {})
|
||||
if liftable(span_css):
|
||||
pclass = parent.get('class', None)
|
||||
if span_class:
|
||||
pclass = (pclass + ' ' + span_class) if pclass else span_class
|
||||
parent.set('class', pclass)
|
||||
parent.text = span.text
|
||||
parent.remove(span)
|
||||
for child in span:
|
||||
parent.append(child)
|
||||
|
||||
# Make spans whose only styling is bold or italic into <b> and <i> tags
|
||||
for span in root.xpath('//span[@class]'):
|
||||
css = class_map.get(span.get('class', None), {})
|
||||
if len(css) == 1:
|
||||
if css == {'font-style':'italic'}:
|
||||
span.tag = 'i'
|
||||
del span.attrib['class']
|
||||
elif css == {'font-weight':'bold'}:
|
||||
span.tag = 'b'
|
||||
del span.attrib['class']
|
||||
|
||||
# Get rid of <span>s that have no styling
|
||||
for span in root.xpath('//span[not(@class) and not(@id)]'):
|
||||
lift(span)
|
||||
|
@ -39,7 +39,7 @@ def read_doc_props(raw, mi):
|
||||
for keywords in XPath('//cp:keywords')(root):
|
||||
if keywords.text and keywords.text.strip():
|
||||
for x in keywords.text.split():
|
||||
tags.extend(y.strip() for y in x.split(','))
|
||||
tags.extend(y.strip() for y in x.split(',') if y.strip())
|
||||
if tags:
|
||||
mi.tags = tags
|
||||
authors = XPath('//dc:creator')(root)
|
||||
|
@ -15,7 +15,7 @@ from calibre.utils.zipfile import ZipFile
|
||||
|
||||
def dump(path):
|
||||
dest = os.path.splitext(os.path.basename(path))[0]
|
||||
dest += '_extracted'
|
||||
dest += '-dumped'
|
||||
if os.path.exists(dest):
|
||||
shutil.rmtree(dest)
|
||||
with ZipFile(path) as zf:
|
||||
|
@ -104,9 +104,12 @@ class Images(object):
|
||||
if rid in self.used:
|
||||
return self.used[rid]
|
||||
raw = self.docx.read(self.rid_map[rid])
|
||||
base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_')
|
||||
base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
|
||||
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
|
||||
base = base.rpartition('.')[0] + '.' + ext
|
||||
base = base.rpartition('.')[0]
|
||||
if not base:
|
||||
base = 'image'
|
||||
base += '.' + ext
|
||||
exists = frozenset(self.used.itervalues())
|
||||
c = 1
|
||||
while base in exists:
|
||||
@ -132,7 +135,7 @@ class Images(object):
|
||||
src = self.generate_filename(rid, name)
|
||||
img = IMG(src='images/%s' % src)
|
||||
if alt:
|
||||
img(alt=alt)
|
||||
img.set('alt', alt)
|
||||
return img
|
||||
|
||||
def drawing_to_html(self, drawing, page):
|
||||
@ -157,6 +160,17 @@ class Images(object):
|
||||
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
|
||||
yield ans
|
||||
|
||||
def pict_to_html(self, pict, page):
|
||||
for imagedata in XPath('descendant::v:imagedata[@r:id]')(pict):
|
||||
rid = get(imagedata, 'r:id')
|
||||
if rid in self.rid_map:
|
||||
src = self.generate_filename(rid)
|
||||
img = IMG(src='images/%s' % src, style="display:block")
|
||||
alt = get(imagedata, 'o:title')
|
||||
if alt:
|
||||
img.set('alt', alt)
|
||||
yield img
|
||||
|
||||
def get_float_properties(self, anchor, style, page):
|
||||
if 'display' not in style:
|
||||
style['display'] = 'block'
|
||||
@ -200,6 +214,8 @@ class Images(object):
|
||||
if elem.tag.endswith('}drawing'):
|
||||
for tag in self.drawing_to_html(elem, page):
|
||||
yield tag
|
||||
# TODO: Handle w:pict
|
||||
else:
|
||||
for tag in self.pict_to_html(elem, page):
|
||||
yield tag
|
||||
|
||||
|
||||
|
@ -7,7 +7,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import re
|
||||
from future_builtins import map
|
||||
|
||||
from lxml.etree import XPath as X
|
||||
|
||||
@ -23,6 +22,7 @@ IMAGES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships
|
||||
LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink'
|
||||
FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes'
|
||||
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
|
||||
THEMES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme'
|
||||
|
||||
namespaces = {
|
||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||
@ -84,11 +84,10 @@ def get(x, attr, default=None):
|
||||
return x.attrib.get(expand(attr), default)
|
||||
|
||||
def ancestor(elem, name):
|
||||
tag = expand(name)
|
||||
while elem is not None:
|
||||
elem = elem.getparent()
|
||||
if getattr(elem, 'tag', None) == tag:
|
||||
return elem
|
||||
try:
|
||||
return XPath('ancestor::%s[1]' % name)(elem)[0]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def generate_anchor(name, existing):
|
||||
x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_')
|
||||
@ -99,7 +98,7 @@ def generate_anchor(name, existing):
|
||||
return y
|
||||
|
||||
def children(elem, *args):
|
||||
return elem.iterchildren(*map(expand, args))
|
||||
return XPath('|'.join('child::%s' % a for a in args))(elem)
|
||||
|
||||
def descendants(elem, *args):
|
||||
return elem.iterdescendants(*map(expand, args))
|
||||
return XPath('|'.join('descendant::%s' % a for a in args))(elem)
|
||||
|
@ -142,8 +142,8 @@ class Styles(object):
|
||||
def get(self, key, default=None):
|
||||
return self.id_map.get(key, default)
|
||||
|
||||
def __call__(self, root, fonts):
|
||||
self.fonts = fonts
|
||||
def __call__(self, root, fonts, theme):
|
||||
self.fonts, self.theme = fonts, theme
|
||||
for s in XPath('//w:style')(root):
|
||||
s = Style(s)
|
||||
if s.style_id:
|
||||
@ -265,7 +265,8 @@ class Styles(object):
|
||||
def resolve_run(self, r):
|
||||
ans = self.run_cache.get(r, None)
|
||||
if ans is None:
|
||||
p = r.getparent()
|
||||
p = XPath('ancestor::w:p[1]')(r)
|
||||
p = p[0] if p else None
|
||||
ans = self.run_cache[r] = RunStyle()
|
||||
direct_formatting = None
|
||||
for rPr in XPath('./w:rPr')(r):
|
||||
@ -282,12 +283,16 @@ class Styles(object):
|
||||
default_char = self.default_styles.get('character', None)
|
||||
if self.default_character_style is not None:
|
||||
parent_styles.append(self.default_character_style)
|
||||
ts = self.tables.run_style(p)
|
||||
if ts is not None:
|
||||
parent_styles.append(ts)
|
||||
pstyle = self.para_char_cache.get(p, None)
|
||||
if pstyle is not None:
|
||||
parent_styles.append(pstyle)
|
||||
# As best as I can understand the spec, table overrides should be
|
||||
# applied before paragraph overrides, but word does it
|
||||
# this way, see the December 2007 table header in the demo
|
||||
# document.
|
||||
ts = self.tables.run_style(p)
|
||||
if ts is not None:
|
||||
parent_styles.append(ts)
|
||||
if direct_formatting.linked_style is not None:
|
||||
ls = self.get(direct_formatting.linked_style).character_style
|
||||
if ls is not None:
|
||||
@ -299,7 +304,8 @@ class Styles(object):
|
||||
setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
|
||||
|
||||
if ans.font_family is not inherit:
|
||||
ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i)
|
||||
ff = self.theme.resolve_font_family(ans.font_family)
|
||||
ans.font_family = self.fonts.family_for(ff, ans.b, ans.i)
|
||||
|
||||
return ans
|
||||
|
||||
@ -312,51 +318,63 @@ class Styles(object):
|
||||
def cascade(self, layers):
|
||||
self.body_font_family = 'serif'
|
||||
self.body_font_size = '10pt'
|
||||
self.body_color = 'black'
|
||||
|
||||
def promote_property(char_styles, block_style, prop):
|
||||
vals = {getattr(s, prop) for s in char_styles}
|
||||
if len(vals) == 1:
|
||||
# All the character styles have the same value
|
||||
for s in char_styles:
|
||||
setattr(s, prop, inherit)
|
||||
setattr(block_style, prop, next(iter(vals)))
|
||||
|
||||
for p, runs in layers.iteritems():
|
||||
has_links = '1' in {r.get('is-link', None) for r in runs}
|
||||
char_styles = [self.resolve_run(r) for r in runs]
|
||||
block_style = self.resolve_paragraph(p)
|
||||
c = Counter()
|
||||
for prop in ('font_family', 'font_size', 'color'):
|
||||
if has_links and prop == 'color':
|
||||
# We cannot promote color as browser rendering engines will
|
||||
# override the link color setting it to blue, unless the
|
||||
# color is specified on the link element itself
|
||||
continue
|
||||
promote_property(char_styles, block_style, prop)
|
||||
for s in char_styles:
|
||||
if s.font_family is not inherit:
|
||||
c[s.font_family] += 1
|
||||
if s.text_decoration == 'none':
|
||||
# The default text decoration is 'none'
|
||||
s.text_decoration = inherit
|
||||
|
||||
def promote_most_common(block_styles, prop, default):
|
||||
c = Counter()
|
||||
for s in block_styles:
|
||||
val = getattr(s, prop)
|
||||
if val is not inherit:
|
||||
c[val] += 1
|
||||
val = None
|
||||
if c:
|
||||
family = c.most_common(1)[0][0]
|
||||
block_style.font_family = family
|
||||
for s in char_styles:
|
||||
if s.font_family == family:
|
||||
s.font_family = inherit
|
||||
val = c.most_common(1)[0][0]
|
||||
for s in block_styles:
|
||||
oval = getattr(s, prop)
|
||||
if oval is inherit:
|
||||
if default != val:
|
||||
setattr(s, prop, default)
|
||||
elif oval == val:
|
||||
setattr(s, prop, inherit)
|
||||
return val
|
||||
|
||||
sizes = [s.font_size for s in char_styles if s.font_size is not inherit]
|
||||
if sizes:
|
||||
sz = block_style.font_size = sizes[0]
|
||||
for s in char_styles:
|
||||
if s.font_size == sz:
|
||||
s.font_size = inherit
|
||||
block_styles = tuple(self.resolve_paragraph(p) for p in layers)
|
||||
|
||||
block_styles = [self.resolve_paragraph(p) for p in layers]
|
||||
c = Counter()
|
||||
for s in block_styles:
|
||||
if s.font_family is not inherit:
|
||||
c[s.font_family] += 1
|
||||
ff = promote_most_common(block_styles, 'font_family', self.body_font_family)
|
||||
if ff is not None:
|
||||
self.body_font_family = ff
|
||||
|
||||
if c:
|
||||
self.body_font_family = family = c.most_common(1)[0][0]
|
||||
for s in block_styles:
|
||||
if s.font_family == family:
|
||||
s.font_family = inherit
|
||||
fs = promote_most_common(block_styles, 'font_size', int(self.body_font_size[:2]))
|
||||
if fs is not None:
|
||||
self.body_font_size = '%.3gpt' % fs
|
||||
|
||||
c = Counter()
|
||||
for s in block_styles:
|
||||
if s.font_size is not inherit:
|
||||
c[s.font_size] += 1
|
||||
|
||||
if c:
|
||||
sz = c.most_common(1)[0][0]
|
||||
for s in block_styles:
|
||||
if s.font_size == sz:
|
||||
s.font_size = inherit
|
||||
self.body_font_size = '%.3gpt' % sz
|
||||
color = promote_most_common(block_styles, 'color', self.body_color)
|
||||
if color is not None:
|
||||
self.body_color = color
|
||||
|
||||
def resolve_numbering(self, numbering):
|
||||
# When a numPr element appears inside a paragraph style, the lvl info
|
||||
@ -398,9 +416,7 @@ class Styles(object):
|
||||
ef = self.fonts.embed_fonts(dest_dir, docx)
|
||||
prefix = textwrap.dedent(
|
||||
'''\
|
||||
body { font-family: %s; font-size: %s }
|
||||
|
||||
p { text-indent: 1.5em }
|
||||
body { font-family: %s; font-size: %s; color: %s }
|
||||
|
||||
ul, ol, p { margin: 0; padding: 0 }
|
||||
|
||||
@ -416,7 +432,7 @@ class Styles(object):
|
||||
|
||||
dl.notes dd:last-of-type { page-break-after: avoid }
|
||||
|
||||
''') % (self.body_font_family, self.body_font_size)
|
||||
''') % (self.body_font_family, self.body_font_size, self.body_color)
|
||||
if ef:
|
||||
prefix = ef + '\n' + prefix
|
||||
|
||||
@ -427,3 +443,4 @@ class Styles(object):
|
||||
ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';')))
|
||||
return prefix + '\n' + '\n'.join(ans)
|
||||
|
||||
|
||||
|
@ -8,11 +8,14 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from lxml.html.builder import TABLE, TR, TD
|
||||
|
||||
from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border, binary_property, border_props, ParagraphStyle # noqa
|
||||
from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle
|
||||
from calibre.ebooks.docx.char_styles import RunStyle
|
||||
from calibre.ebooks.docx.names import XPath, get, is_tag
|
||||
|
||||
# Read from XML {{{
|
||||
read_shd = rs
|
||||
edges = ('left', 'top', 'right', 'bottom')
|
||||
|
||||
def _read_width(elem):
|
||||
ans = inherit
|
||||
try:
|
||||
@ -44,13 +47,13 @@ def read_cell_width(parent, dest):
|
||||
|
||||
def read_padding(parent, dest):
|
||||
name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar'
|
||||
left = top = bottom = right = inherit
|
||||
ans = {x:inherit for x in edges}
|
||||
for mar in XPath('./w:%s' % name)(parent):
|
||||
for x in ('left', 'top', 'right', 'bottom'):
|
||||
for x in edges:
|
||||
for edge in XPath('./w:%s' % x)(mar):
|
||||
locals()[x] = _read_width(edge)
|
||||
for x in ('left', 'top', 'right', 'bottom'):
|
||||
setattr(dest, 'cell_padding_%s' % x, locals()[x])
|
||||
ans[x] = _read_width(edge)
|
||||
for x in edges:
|
||||
setattr(dest, 'cell_padding_%s' % x, ans[x])
|
||||
|
||||
def read_justification(parent, dest):
|
||||
left = right = inherit
|
||||
@ -73,6 +76,12 @@ def read_spacing(parent, dest):
|
||||
ans = _read_width(cs)
|
||||
setattr(dest, 'spacing', ans)
|
||||
|
||||
def read_float(parent, dest):
|
||||
ans = inherit
|
||||
for x in XPath('./w:tblpPr')(parent):
|
||||
ans = {k.rpartition('}')[-1]: v for k, v in x.attrib.iteritems()}
|
||||
setattr(dest, 'float', ans)
|
||||
|
||||
def read_indent(parent, dest):
|
||||
ans = inherit
|
||||
for cs in XPath('./w:tblInd')(parent):
|
||||
@ -139,40 +148,124 @@ def read_look(parent, dest):
|
||||
# }}}
|
||||
|
||||
def clone(style):
|
||||
ans = type(style)()
|
||||
try:
|
||||
ans = type(style)()
|
||||
except TypeError:
|
||||
return None
|
||||
ans.update(style)
|
||||
return ans
|
||||
|
||||
class RowStyle(object):
|
||||
class Style(object):
|
||||
|
||||
def update(self, other):
|
||||
for prop in self.all_properties:
|
||||
nval = getattr(other, prop)
|
||||
if nval is not inherit:
|
||||
setattr(self, prop, nval)
|
||||
|
||||
def convert_spacing(self):
|
||||
ans = {}
|
||||
if self.spacing is not inherit:
|
||||
if self.spacing in {'auto', '0'}:
|
||||
ans['border-collapse'] = 'collapse'
|
||||
else:
|
||||
ans['border-collapse'] = 'separate'
|
||||
ans['border-spacing'] = self.spacing
|
||||
return ans
|
||||
|
||||
def convert_border(self):
|
||||
c = {}
|
||||
for x in edges:
|
||||
for prop in border_props:
|
||||
prop = prop % x
|
||||
if prop.startswith('border'):
|
||||
val = getattr(self, prop)
|
||||
if val is not inherit:
|
||||
if isinstance(val, (int, float)):
|
||||
val = '%.3gpt' % val
|
||||
c[prop.replace('_', '-')] = val
|
||||
return c
|
||||
|
||||
class RowStyle(Style):
|
||||
|
||||
all_properties = ('height', 'cantSplit', 'hidden', 'spacing',)
|
||||
|
||||
def __init__(self, tcPr=None):
|
||||
if tcPr is None:
|
||||
for p in self.all_properties:
|
||||
setattr(self, p, inherit)
|
||||
else:
|
||||
pass
|
||||
|
||||
class CellStyle(object):
|
||||
|
||||
all_properties = ('background_color', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
|
||||
'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge',
|
||||
) + tuple(k % edge for edge in border_edges for k in border_props)
|
||||
|
||||
def __init__(self, trPr=None):
|
||||
if trPr is None:
|
||||
for p in self.all_properties:
|
||||
setattr(self, p, inherit)
|
||||
else:
|
||||
for p in ('hidden', 'cantSplit'):
|
||||
setattr(self, p, binary_property(trPr, p))
|
||||
for p in ('spacing', 'height'):
|
||||
f = globals()['read_%s' % p]
|
||||
f(trPr, self)
|
||||
self._css = None
|
||||
|
||||
@property
|
||||
def css(self):
|
||||
if self._css is None:
|
||||
c = self._css = {}
|
||||
if self.hidden is True:
|
||||
c['display'] = 'none'
|
||||
if self.cantSplit is True:
|
||||
c['page-break-inside'] = 'avoid'
|
||||
if self.height is not inherit:
|
||||
rule, val = self.height
|
||||
if rule != 'auto':
|
||||
try:
|
||||
c['min-height' if rule == 'atLeast' else 'height'] = '%.3gpt' % (int(val)/20)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
c.update(self.convert_spacing())
|
||||
return self._css
|
||||
|
||||
class CellStyle(Style):
|
||||
|
||||
all_properties = ('background_color', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
|
||||
'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge', 'row_span',
|
||||
) + tuple(k % edge for edge in border_edges for k in border_props)
|
||||
|
||||
def __init__(self, tcPr=None):
|
||||
if tcPr is None:
|
||||
for p in self.all_properties:
|
||||
setattr(self, p, inherit)
|
||||
else:
|
||||
for x in ('borders', 'shd', 'padding', 'cell_width', 'vertical_align', 'col_span', 'merge'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(trPr, self)
|
||||
f(tcPr, self)
|
||||
self.row_span = inherit
|
||||
self._css = None
|
||||
|
||||
class TableStyle(object):
|
||||
@property
|
||||
def css(self):
|
||||
if self._css is None:
|
||||
self._css = c = {}
|
||||
if self.background_color is not inherit:
|
||||
c['background-color'] = self.background_color
|
||||
if self.width not in (inherit, 'auto'):
|
||||
c['width'] = self.width
|
||||
c['vertical-align'] = 'top' if self.vertical_align is inherit else self.vertical_align
|
||||
for x in edges:
|
||||
val = getattr(self, 'cell_padding_%s' % x)
|
||||
if val not in (inherit, 'auto'):
|
||||
c['padding-%s' % x] = val
|
||||
elif val is inherit and x in {'left', 'right'}:
|
||||
c['padding-%s' % x] = '%.3gpt' % (115/20)
|
||||
# In Word, tables are apparently rendered with some default top and
|
||||
# bottom padding irrespective of the cellMargin values. Simulate
|
||||
# that here.
|
||||
for x in ('top', 'bottom'):
|
||||
if c.get('padding-%s' % x, '0pt') == '0pt':
|
||||
c['padding-%s' % x] = '0.5ex'
|
||||
c.update(self.convert_border())
|
||||
|
||||
return self._css
|
||||
|
||||
class TableStyle(Style):
|
||||
|
||||
all_properties = (
|
||||
'width', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
|
||||
'width', 'float', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
|
||||
'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color',
|
||||
'spacing', 'indent', 'overrides', 'col_band_size', 'row_band_size', 'look',
|
||||
) + tuple(k % edge for edge in border_edges for k in border_props)
|
||||
@ -183,7 +276,7 @@ class TableStyle(object):
|
||||
setattr(self, p, inherit)
|
||||
else:
|
||||
self.overrides = inherit
|
||||
for x in ('width', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'):
|
||||
for x in ('width', 'float', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(tblPr, self)
|
||||
parent = tblPr.getparent()
|
||||
@ -197,17 +290,12 @@ class TableStyle(object):
|
||||
for trPr in XPath('./w:trPr')(tblStylePr):
|
||||
orides['row'] = RowStyle(trPr)
|
||||
for tcPr in XPath('./w:tcPr')(tblStylePr):
|
||||
orides['cell'] = tcPr
|
||||
orides['cell'] = CellStyle(tcPr)
|
||||
for pPr in XPath('./w:pPr')(tblStylePr):
|
||||
orides['para'] = ParagraphStyle(pPr)
|
||||
for rPr in XPath('./w:rPr')(tblStylePr):
|
||||
orides['run'] = RunStyle(rPr)
|
||||
|
||||
def update(self, other):
|
||||
for prop in self.all_properties:
|
||||
nval = getattr(other, prop)
|
||||
if nval is not inherit:
|
||||
setattr(self, prop, nval)
|
||||
self._css = None
|
||||
|
||||
def resolve_based_on(self, parent):
|
||||
for p in self.all_properties:
|
||||
@ -215,11 +303,50 @@ class TableStyle(object):
|
||||
if val is inherit:
|
||||
setattr(self, p, getattr(parent, p))
|
||||
|
||||
@property
|
||||
def css(self):
|
||||
if self._css is None:
|
||||
c = self._css = {}
|
||||
if self.width not in (inherit, 'auto'):
|
||||
c['width'] = self.width
|
||||
for x in ('background_color', 'margin_left', 'margin_right'):
|
||||
val = getattr(self, x)
|
||||
if val is not inherit:
|
||||
c[x.replace('_', '-')] = val
|
||||
if self.indent not in (inherit, 'auto') and self.margin_left != 'auto':
|
||||
c['margin-left'] = self.indent
|
||||
if self.float is not inherit:
|
||||
for x in ('left', 'top', 'right', 'bottom'):
|
||||
val = self.float.get('%sFromText' % x, 0)
|
||||
try:
|
||||
val = '%.3gpt' % (int(val) / 20)
|
||||
except (ValueError, TypeError):
|
||||
val = '0'
|
||||
c['margin-%s' % x] = val
|
||||
if 'tblpXSpec' in self.float:
|
||||
c['float'] = 'right' if self.float['tblpXSpec'] in {'right', 'outside'} else 'left'
|
||||
else:
|
||||
page = self.page
|
||||
page_width = page.width - page.margin_left - page.margin_right
|
||||
try:
|
||||
x = int(self.float['tblpX']) / 20
|
||||
except (KeyError, ValueError, TypeError):
|
||||
x = 0
|
||||
c['float'] = 'left' if (x/page_width) < 0.65 else 'right'
|
||||
c.update(self.convert_spacing())
|
||||
if 'border-collapse' not in c:
|
||||
c['border-collapse'] = 'collapse'
|
||||
c.update(self.convert_border())
|
||||
|
||||
return self._css
|
||||
|
||||
|
||||
class Table(object):
|
||||
|
||||
def __init__(self, tbl, styles, para_map):
|
||||
def __init__(self, tbl, styles, para_map, is_sub_table=False):
|
||||
self.tbl = tbl
|
||||
self.styles = styles
|
||||
self.is_sub_table = is_sub_table
|
||||
|
||||
# Read Table Style
|
||||
style = {'table':TableStyle()}
|
||||
@ -243,21 +370,33 @@ class Table(object):
|
||||
style['table'].update(TableStyle(tblPr))
|
||||
self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None)
|
||||
self.run_style = style.get('run', None)
|
||||
self.overrides = self.table_style.overrides
|
||||
if self.overrides is inherit:
|
||||
self.overrides = {}
|
||||
if 'wholeTable' in self.overrides and 'table' in self.overrides['wholeTable']:
|
||||
self.table_style.update(self.overrides['wholeTable']['table'])
|
||||
|
||||
self.style_map = {}
|
||||
self.paragraphs = []
|
||||
self.cell_map = []
|
||||
|
||||
rows = XPath('./w:tr')(tbl)
|
||||
for r, tr in enumerate(rows):
|
||||
overrides = self.get_overrides(r, None, len(rows), None)
|
||||
self.resolve_row_style(tr, overrides)
|
||||
cells = XPath('./w:tc')(tr)
|
||||
self.cell_map.append([])
|
||||
for c, tc in enumerate(cells):
|
||||
overrides = self.get_overrides(r, c, len(rows), len(cells))
|
||||
self.resolve_cell_style(tc, overrides, r, c, len(rows), len(cells))
|
||||
self.cell_map[-1].append(tc)
|
||||
for p in XPath('./w:p')(tc):
|
||||
para_map[p] = self
|
||||
self.paragraphs.append(p)
|
||||
self.resolve_para_style(p, overrides)
|
||||
|
||||
self.sub_tables = {x:Table(x, styles, para_map) for x in XPath('./w:tr/w:tc/w:tbl')(tbl)}
|
||||
self.handle_merged_cells()
|
||||
self.sub_tables = {x:Table(x, styles, para_map, is_sub_table=True) for x in XPath('./w:tr/w:tc/w:tbl')(tbl)}
|
||||
|
||||
def override_allowed(self, name):
|
||||
'Check if the named override is allowed by the tblLook element'
|
||||
@ -279,37 +418,102 @@ class Table(object):
|
||||
overrides = ['wholeTable']
|
||||
def divisor(m, n):
|
||||
return (m - (m % n)) // n
|
||||
odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 0
|
||||
overrides.append('band%dVert' % (1 if odd_column_band else 2))
|
||||
odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 0
|
||||
if c is not None:
|
||||
odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 1
|
||||
overrides.append('band%dVert' % (1 if odd_column_band else 2))
|
||||
odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1
|
||||
overrides.append('band%dHorz' % (1 if odd_row_band else 2))
|
||||
|
||||
# According to the OOXML spec columns should have higher override
|
||||
# priority than rows, but Word seems to do it the other way around.
|
||||
if c is not None:
|
||||
if c == 0:
|
||||
overrides.append('firstCol')
|
||||
if c >= num_of_cols_in_row - 1:
|
||||
overrides.append('lastCol')
|
||||
if r == 0:
|
||||
overrides.append('firstRow')
|
||||
if r >= num_of_rows - 1:
|
||||
overrides.append('lastRow')
|
||||
if c == 0:
|
||||
overrides.append('firstCol')
|
||||
if c >= num_of_cols_in_row - 1:
|
||||
overrides.append('lastCol')
|
||||
if r == 0:
|
||||
if c == 0:
|
||||
overrides.append('nwCell')
|
||||
if c == num_of_cols_in_row - 1:
|
||||
overrides.append('neCell')
|
||||
if r == num_of_rows - 1:
|
||||
if c == 0:
|
||||
overrides.append('swCell')
|
||||
if c == num_of_cols_in_row - 1:
|
||||
overrides.append('seCell')
|
||||
if c is not None:
|
||||
if r == 0:
|
||||
if c == 0:
|
||||
overrides.append('nwCell')
|
||||
if c == num_of_cols_in_row - 1:
|
||||
overrides.append('neCell')
|
||||
if r == num_of_rows - 1:
|
||||
if c == 0:
|
||||
overrides.append('swCell')
|
||||
if c == num_of_cols_in_row - 1:
|
||||
overrides.append('seCell')
|
||||
return tuple(filter(self.override_allowed, overrides))
|
||||
|
||||
def resolve_row_style(self, tr, overrides):
|
||||
rs = RowStyle()
|
||||
for o in overrides:
|
||||
if o in self.overrides:
|
||||
ovr = self.overrides[o]
|
||||
ors = ovr.get('row', None)
|
||||
if ors is not None:
|
||||
rs.update(ors)
|
||||
|
||||
for trPr in XPath('./w:trPr')(tr):
|
||||
rs.update(RowStyle(trPr))
|
||||
self.style_map[tr] = rs
|
||||
|
||||
def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row):
|
||||
cs = CellStyle()
|
||||
# from lxml.etree import tostring
|
||||
# txt = tostring(tc, method='text', encoding=unicode)
|
||||
for o in overrides:
|
||||
if o in self.overrides:
|
||||
ovr = self.overrides[o]
|
||||
ors = ovr.get('cell', None)
|
||||
if ors is not None:
|
||||
cs.update(ors)
|
||||
|
||||
for tcPr in XPath('./w:tcPr')(tc):
|
||||
cs.update(CellStyle(tcPr))
|
||||
|
||||
for x in edges:
|
||||
p = 'cell_padding_%s' % x
|
||||
val = getattr(cs, p)
|
||||
if val is inherit:
|
||||
setattr(cs, p, getattr(self.table_style, p))
|
||||
|
||||
is_inside_edge = (
|
||||
(x == 'left' and col > 0) or
|
||||
(x == 'top' and row > 0) or
|
||||
(x == 'right' and col < cols_in_row - 1) or
|
||||
(x == 'bottom' and row < rows -1)
|
||||
)
|
||||
inside_edge = ('insideH' if x in {'top', 'bottom'} else 'insideV') if is_inside_edge else None
|
||||
for prop in border_props:
|
||||
if not prop.startswith('border'):
|
||||
continue
|
||||
eprop = prop % x
|
||||
iprop = (prop % inside_edge) if inside_edge else None
|
||||
val = getattr(cs, eprop)
|
||||
if val is inherit and iprop is not None:
|
||||
# Use the insideX borders if the main cell borders are not
|
||||
# specified
|
||||
val = getattr(cs, iprop)
|
||||
if val is inherit:
|
||||
val = getattr(self.table_style, iprop)
|
||||
if not is_inside_edge and val == 'none':
|
||||
# Cell borders must override table borders even when the
|
||||
# table border is not null and the cell border is null.
|
||||
val = 'hidden'
|
||||
setattr(cs, eprop, val)
|
||||
|
||||
self.style_map[tc] = cs
|
||||
|
||||
def resolve_para_style(self, p, overrides):
|
||||
text_styles = [None if self.paragraph_style is None else clone(self.paragraph_style),
|
||||
None if self.run_style is None else clone(self.run_style)]
|
||||
text_styles = [clone(self.paragraph_style), clone(self.run_style)]
|
||||
|
||||
for o in overrides:
|
||||
if o in self.table_style.overrides:
|
||||
ovr = self.table_style.overrides[o]
|
||||
if o in self.overrides:
|
||||
ovr = self.overrides[o]
|
||||
for i, name in enumerate(('para', 'run')):
|
||||
ops = ovr.get(name, None)
|
||||
if ops is not None:
|
||||
@ -319,6 +523,55 @@ class Table(object):
|
||||
text_styles[i].update(ops)
|
||||
self.style_map[p] = text_styles
|
||||
|
||||
def handle_merged_cells(self):
|
||||
if not self.cell_map:
|
||||
return
|
||||
# Handle vMerge
|
||||
max_col_num = max(len(r) for r in self.cell_map)
|
||||
for c in xrange(max_col_num):
|
||||
cells = [row[c] if c < len(row) else None for row in self.cell_map]
|
||||
runs = [[]]
|
||||
for cell in cells:
|
||||
try:
|
||||
s = self.style_map[cell]
|
||||
except KeyError: # cell is None
|
||||
s = CellStyle()
|
||||
if s.vMerge == 'restart':
|
||||
runs.append([cell])
|
||||
elif s.vMerge == 'continue':
|
||||
runs[-1].append(cell)
|
||||
else:
|
||||
runs.append([])
|
||||
for run in runs:
|
||||
if len(run) > 1:
|
||||
self.style_map[run[0]].row_span = len(run)
|
||||
for tc in run[1:]:
|
||||
tc.getparent().remove(tc)
|
||||
|
||||
# Handle hMerge
|
||||
for cells in self.cell_map:
|
||||
runs = [[]]
|
||||
for cell in cells:
|
||||
try:
|
||||
s = self.style_map[cell]
|
||||
except KeyError: # cell is None
|
||||
s = CellStyle()
|
||||
if s.col_span is not inherit:
|
||||
runs.append([])
|
||||
continue
|
||||
if s.hMerge == 'restart':
|
||||
runs.append([cell])
|
||||
elif s.hMerge == 'continue':
|
||||
runs[-1].append(cell)
|
||||
else:
|
||||
runs.append([])
|
||||
|
||||
for run in runs:
|
||||
if len(run) > 1:
|
||||
self.style_map[run[0]].col_span = len(run)
|
||||
for tc in run[1:]:
|
||||
tc.getparent().remove(tc)
|
||||
|
||||
def __iter__(self):
|
||||
for p in self.paragraphs:
|
||||
yield p
|
||||
@ -326,8 +579,10 @@ class Table(object):
|
||||
for p in t:
|
||||
yield p
|
||||
|
||||
def apply_markup(self, rmap, parent=None):
|
||||
def apply_markup(self, rmap, page, parent=None):
|
||||
table = TABLE('\n\t\t')
|
||||
self.table_style.page = page
|
||||
style_map = {}
|
||||
if parent is None:
|
||||
try:
|
||||
first_para = rmap[next(iter(self))]
|
||||
@ -340,36 +595,53 @@ class Table(object):
|
||||
parent.append(table)
|
||||
for row in XPath('./w:tr')(self.tbl):
|
||||
tr = TR('\n\t\t\t')
|
||||
style_map[tr] = self.style_map[row]
|
||||
tr.tail = '\n\t\t'
|
||||
table.append(tr)
|
||||
for tc in XPath('./w:tc')(row):
|
||||
td = TD()
|
||||
style_map[td] = s = self.style_map[tc]
|
||||
if s.col_span is not inherit:
|
||||
td.set('colspan', type('')(s.col_span))
|
||||
if s.row_span is not inherit:
|
||||
td.set('rowspan', type('')(s.row_span))
|
||||
td.tail = '\n\t\t\t'
|
||||
tr.append(td)
|
||||
for x in XPath('./w:p|./w:tbl')(tc):
|
||||
if x.tag.endswith('}p'):
|
||||
td.append(rmap[x])
|
||||
else:
|
||||
self.sub_tables[x].apply_markup(rmap, parent=td)
|
||||
self.sub_tables[x].apply_markup(rmap, page, parent=td)
|
||||
if len(tr):
|
||||
tr[-1].tail = '\n\t\t'
|
||||
if len(table):
|
||||
table[-1].tail = '\n\t'
|
||||
|
||||
table_style = self.table_style.css
|
||||
if table_style:
|
||||
table.set('class', self.styles.register(table_style, 'table'))
|
||||
for elem, style in style_map.iteritems():
|
||||
css = style.css
|
||||
if css:
|
||||
elem.set('class', self.styles.register(css, elem.tag))
|
||||
|
||||
class Tables(object):
|
||||
|
||||
def __init__(self):
|
||||
self.tables = []
|
||||
self.para_map = {}
|
||||
self.sub_tables = set()
|
||||
|
||||
def register(self, tbl, styles):
|
||||
if tbl in self.sub_tables:
|
||||
return
|
||||
self.tables.append(Table(tbl, styles, self.para_map))
|
||||
self.sub_tables |= set(self.tables[-1].sub_tables)
|
||||
|
||||
def apply_markup(self, object_map):
|
||||
def apply_markup(self, object_map, page_map):
|
||||
rmap = {v:k for k, v in object_map.iteritems()}
|
||||
for table in self.tables:
|
||||
table.apply_markup(rmap)
|
||||
table.apply_markup(rmap, page_map[table.tbl])
|
||||
|
||||
def para_style(self, p):
|
||||
table = self.para_map.get(p, None)
|
||||
|
31
src/calibre/ebooks/docx/theme.py
Normal file
31
src/calibre/ebooks/docx/theme.py
Normal file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from calibre.ebooks.docx.names import XPath
|
||||
|
||||
|
||||
class Theme(object):
|
||||
|
||||
def __init__(self):
|
||||
self.major_latin_font = 'Cambria'
|
||||
self.minor_latin_font = 'Calibri'
|
||||
|
||||
def __call__(self, root):
|
||||
for fs in XPath('//a:fontScheme')(root):
|
||||
for mj in XPath('./a:majorFont')(fs):
|
||||
for l in XPath('./a:latin[@typeface]')(mj):
|
||||
self.major_latin_font = l.get('typeface')
|
||||
for mj in XPath('./a:minorFont')(fs):
|
||||
for l in XPath('./a:latin[@typeface]')(mj):
|
||||
self.minor_latin_font = l.get('typeface')
|
||||
|
||||
def resolve_font_family(self, ff):
|
||||
if ff.startswith('|'):
|
||||
ff = ff[1:-1]
|
||||
ff = self.major_latin_font if ff.startswith('major') else self.minor_latin_font
|
||||
return ff
|
@ -16,13 +16,15 @@ from lxml.html.builder import (
|
||||
from calibre.ebooks.docx.container import DOCX, fromstring
|
||||
from calibre.ebooks.docx.names import (
|
||||
XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
|
||||
descendants, ancestor, FOOTNOTES, ENDNOTES)
|
||||
descendants, FOOTNOTES, ENDNOTES, children, THEMES)
|
||||
from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
|
||||
from calibre.ebooks.docx.numbering import Numbering
|
||||
from calibre.ebooks.docx.fonts import Fonts
|
||||
from calibre.ebooks.docx.images import Images
|
||||
from calibre.ebooks.docx.tables import Tables
|
||||
from calibre.ebooks.docx.footnotes import Footnotes
|
||||
from calibre.ebooks.docx.cleanup import cleanup_markup
|
||||
from calibre.ebooks.docx.theme import Theme
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
||||
@ -41,11 +43,14 @@ class Convert(object):
|
||||
|
||||
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
|
||||
self.docx = DOCX(path_or_stream, log=log)
|
||||
self.ms_pat = re.compile(r'\s{2,}')
|
||||
self.ws_pat = re.compile(r'[\n\r\t]')
|
||||
self.log = self.docx.log
|
||||
self.notes_text = notes_text or _('Notes')
|
||||
self.dest_dir = dest_dir or os.getcwdu()
|
||||
self.mi = self.docx.metadata
|
||||
self.body = BODY()
|
||||
self.theme = Theme()
|
||||
self.tables = Tables()
|
||||
self.styles = Styles(self.tables)
|
||||
self.images = Images()
|
||||
@ -82,11 +87,13 @@ class Convert(object):
|
||||
self.anchor_map = {}
|
||||
self.link_map = defaultdict(list)
|
||||
|
||||
self.log.debug('Converting Word markup to HTML')
|
||||
self.read_page_properties(doc)
|
||||
for wp, page_properties in self.page_map.iteritems():
|
||||
self.current_page = page_properties
|
||||
p = self.convert_p(wp)
|
||||
self.body.append(p)
|
||||
if wp.tag.endswith('}p'):
|
||||
p = self.convert_p(wp)
|
||||
self.body.append(p)
|
||||
|
||||
notes_header = None
|
||||
if self.footnotes.has_notes:
|
||||
@ -103,6 +110,7 @@ class Convert(object):
|
||||
for wp in note:
|
||||
if wp.tag.endswith('}tbl'):
|
||||
self.tables.register(wp, self.styles)
|
||||
self.page_map[wp] = self.current_page
|
||||
p = self.convert_p(wp)
|
||||
dl[-1].append(p)
|
||||
|
||||
@ -110,7 +118,7 @@ class Convert(object):
|
||||
|
||||
self.styles.cascade(self.layers)
|
||||
|
||||
self.tables.apply_markup(self.object_map)
|
||||
self.tables.apply_markup(self.object_map, self.page_map)
|
||||
|
||||
numbered = []
|
||||
for html_obj, obj in self.object_map.iteritems():
|
||||
@ -131,6 +139,7 @@ class Convert(object):
|
||||
child.tail = '\n\t'
|
||||
self.body[-1].tail = '\n'
|
||||
|
||||
self.log.debug('Converting styles to CSS')
|
||||
self.styles.generate_classes()
|
||||
for html_obj, obj in self.object_map.iteritems():
|
||||
style = self.styles.resolve(obj)
|
||||
@ -146,13 +155,16 @@ class Convert(object):
|
||||
html_obj.set('class', cls)
|
||||
|
||||
if notes_header is not None:
|
||||
for h in self.body.iterchildren('h1', 'h2', 'h3'):
|
||||
for h in children(self.body, 'h1', 'h2', 'h3'):
|
||||
notes_header.tag = h.tag
|
||||
cls = h.get('class', None)
|
||||
if cls and cls != 'notes-header':
|
||||
notes_header.set('class', '%s notes-header' % cls)
|
||||
break
|
||||
|
||||
self.log.debug('Cleaning up redundant markup generated by Word')
|
||||
cleanup_markup(self.html, self.styles)
|
||||
|
||||
return self.write()
|
||||
|
||||
def read_page_properties(self, doc):
|
||||
@ -162,6 +174,7 @@ class Convert(object):
|
||||
for p in descendants(doc, 'w:p', 'w:tbl'):
|
||||
if p.tag.endswith('}tbl'):
|
||||
self.tables.register(p, self.styles)
|
||||
current.append(p)
|
||||
continue
|
||||
sect = tuple(descendants(p, 'w:sectPr'))
|
||||
if sect:
|
||||
@ -192,6 +205,7 @@ class Convert(object):
|
||||
nname = get_name(NUMBERING, 'numbering.xml')
|
||||
sname = get_name(STYLES, 'styles.xml')
|
||||
fname = get_name(FONTS, 'fontTable.xml')
|
||||
tname = get_name(THEMES, 'theme1.xml')
|
||||
foname = get_name(FOOTNOTES, 'footnotes.xml')
|
||||
enname = get_name(ENDNOTES, 'endnotes.xml')
|
||||
numbering = self.numbering = Numbering()
|
||||
@ -220,13 +234,21 @@ class Convert(object):
|
||||
else:
|
||||
fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir)
|
||||
|
||||
if tname is not None:
|
||||
try:
|
||||
raw = self.docx.read(tname)
|
||||
except KeyError:
|
||||
self.log.warn('Styles %s do not exist' % sname)
|
||||
else:
|
||||
self.theme(fromstring(raw))
|
||||
|
||||
if sname is not None:
|
||||
try:
|
||||
raw = self.docx.read(sname)
|
||||
except KeyError:
|
||||
self.log.warn('Styles %s do not exist' % sname)
|
||||
else:
|
||||
self.styles(fromstring(raw), fonts)
|
||||
self.styles(fromstring(raw), fonts, self.theme)
|
||||
|
||||
if nname is not None:
|
||||
try:
|
||||
@ -259,7 +281,7 @@ class Convert(object):
|
||||
elem.set('id', ans)
|
||||
return ans
|
||||
|
||||
for item in root.iterdescendants(*headings):
|
||||
for item in descendants(root, *headings):
|
||||
lvl = plvl = item_level_map.get(item, None)
|
||||
if lvl is None:
|
||||
continue
|
||||
@ -305,6 +327,7 @@ class Convert(object):
|
||||
|
||||
current_anchor = None
|
||||
current_hyperlink = None
|
||||
hl_xpath = XPath('ancestor::w:hyperlink[1]')
|
||||
|
||||
for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'):
|
||||
if x.tag.endswith('}r'):
|
||||
@ -313,10 +336,11 @@ class Convert(object):
|
||||
(dest if len(dest) == 0 else span).set('id', current_anchor)
|
||||
current_anchor = None
|
||||
if current_hyperlink is not None:
|
||||
hl = ancestor(x, 'w:hyperlink')
|
||||
if hl is not None:
|
||||
try:
|
||||
hl = hl_xpath(x)[0]
|
||||
self.link_map[hl].append(span)
|
||||
else:
|
||||
x.set('is-link', '1')
|
||||
except IndexError:
|
||||
current_hyperlink = None
|
||||
dest.append(span)
|
||||
self.layers[p].append(x)
|
||||
@ -359,6 +383,10 @@ class Convert(object):
|
||||
wrapper = self.wrap_elems(spans, SPAN())
|
||||
wrapper.set('class', cls)
|
||||
|
||||
if not dest.text and len(dest) == 0:
|
||||
# Empty paragraph add a non-breaking space so that it is rendered
|
||||
# by WebKit
|
||||
dest.text = '\xa0'
|
||||
return dest
|
||||
|
||||
def wrap_elems(self, elems, wrapper):
|
||||
@ -406,8 +434,15 @@ class Convert(object):
|
||||
if not child.text:
|
||||
continue
|
||||
space = child.get(XML('space'), None)
|
||||
preserve = False
|
||||
if space == 'preserve':
|
||||
text.add_elem(SPAN(child.text, style="whitespace:pre-wrap"))
|
||||
# Only use a <span> with white-space:pre-wrap if this element
|
||||
# actually needs it, i.e. if it has more than one
|
||||
# consecutive space or it has newlines or tabs.
|
||||
multi_spaces = self.ms_pat.search(child.text) is not None
|
||||
preserve = multi_spaces or self.ws_pat.search(child.text) is not None
|
||||
if preserve:
|
||||
text.add_elem(SPAN(child.text, style="white-space:pre-wrap"))
|
||||
ans.append(text.elem)
|
||||
else:
|
||||
text.buf.append(child.text)
|
||||
@ -415,7 +450,7 @@ class Convert(object):
|
||||
text.add_elem(BR())
|
||||
ans.append(text.elem)
|
||||
elif is_tag(child, 'w:br'):
|
||||
typ = child.get('type', None)
|
||||
typ = get(child, 'w:type')
|
||||
if typ in {'column', 'page'}:
|
||||
br = BR(style='page-break-after:always')
|
||||
else:
|
||||
@ -437,6 +472,8 @@ class Convert(object):
|
||||
l.set('class', 'noteref')
|
||||
text.add_elem(l)
|
||||
ans.append(text.elem)
|
||||
elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate':
|
||||
text.buf.append('\xa0')
|
||||
if text.buf:
|
||||
setattr(text.elem, text.attr, ''.join(text.buf))
|
||||
|
||||
|
@ -27,7 +27,7 @@ def get_metadata(stream):
|
||||
width, height, fmt = identify_data(raw)
|
||||
except:
|
||||
continue
|
||||
if 0.8 <= height/width <= 1.8 and height*width >= 12000:
|
||||
if 0.8 <= height/width <= 1.8 and height*width >= 160000:
|
||||
cdata = (fmt, raw)
|
||||
if cdata is not None:
|
||||
mi.cover_data = cdata
|
||||
|
@ -489,7 +489,7 @@ class MobiMLizer(object):
|
||||
if elem.text:
|
||||
if istate.preserve:
|
||||
text = elem.text
|
||||
elif (len(elem) > 0 and isspace(elem.text) and elem[0].tag and
|
||||
elif (len(elem) > 0 and isspace(elem.text) and hasattr(elem[0].tag, 'rpartition') and
|
||||
elem[0].tag.rpartition('}')[-1] not in INLINE_TAGS):
|
||||
text = None
|
||||
else:
|
||||
|
@ -36,7 +36,8 @@ class Header(OrderedDict):
|
||||
|
||||
for line in self.DEFINITION.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'): continue
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
name, val = [x.strip() for x in line.partition('=')[0::2]]
|
||||
if val:
|
||||
val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
|
||||
@ -66,7 +67,7 @@ class Header(OrderedDict):
|
||||
if val is None:
|
||||
raise ValueError('Dynamic field %r not set'%name)
|
||||
if isinstance(val, (int, long)):
|
||||
fmt = 'H' if name in self.SHORT_FIELDS else 'I'
|
||||
fmt = b'H' if name in self.SHORT_FIELDS else b'I'
|
||||
val = pack(b'>'+fmt, val)
|
||||
buf.write(val)
|
||||
|
||||
@ -79,8 +80,8 @@ class Header(OrderedDict):
|
||||
ans = align_block(ans)
|
||||
return ans
|
||||
|
||||
|
||||
def format_value(self, name, val):
|
||||
return val
|
||||
|
||||
|
||||
|
||||
|
@ -125,7 +125,7 @@ class EbookIterator(BookmarksMixin):
|
||||
[i for i in self.opf.spine if not i.is_linear]
|
||||
self.spine = []
|
||||
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
|
||||
run_char_count=run_char_count)
|
||||
run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
|
||||
is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
|
||||
for i in ordered:
|
||||
spath = i.path
|
||||
|
@ -36,14 +36,30 @@ def anchor_map(html):
|
||||
class SpineItem(unicode):
|
||||
|
||||
def __new__(cls, path, mime_type=None, read_anchor_map=True,
|
||||
run_char_count=True):
|
||||
run_char_count=True, from_epub=False):
|
||||
ppath = path.partition('#')[0]
|
||||
if not os.path.exists(path) and os.path.exists(ppath):
|
||||
path = ppath
|
||||
obj = super(SpineItem, cls).__new__(cls, path)
|
||||
with open(path, 'rb') as f:
|
||||
raw = f.read()
|
||||
raw, obj.encoding = xml_to_unicode(raw)
|
||||
if from_epub:
|
||||
# According to the spec, HTML in EPUB must be encoded in utf-8 or
|
||||
# utf-16. Furthermore, there exist epub files produced by the usual
|
||||
# incompetents that have utf-8 encoded HTML files that contain
|
||||
# incorrect encoding declarations. See
|
||||
# http://www.idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#Section1.4.1.2
|
||||
# http://www.idpf.org/epub/30/spec/epub30-publications.html#confreq-xml-enc
|
||||
# https://bugs.launchpad.net/bugs/1188843
|
||||
# So we first decode with utf-8 and only if that fails we try xml_to_unicode. This
|
||||
# is the same algorithm as that used by the conversion pipeline (modulo
|
||||
# some BOM based detection). Sigh.
|
||||
try:
|
||||
raw, obj.encoding = raw.decode('utf-8'), 'utf-8'
|
||||
except UnicodeDecodeError:
|
||||
raw, obj.encoding = xml_to_unicode(raw)
|
||||
else:
|
||||
raw, obj.encoding = xml_to_unicode(raw)
|
||||
obj.character_count = character_count(raw) if run_char_count else 10000
|
||||
obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
|
||||
obj.start_page = -1
|
||||
@ -100,22 +116,24 @@ class IndexEntry(object):
|
||||
self.end_anchor = None
|
||||
|
||||
def create_indexing_data(spine, toc):
|
||||
if not toc: return
|
||||
if not toc:
|
||||
return
|
||||
f = partial(IndexEntry, spine)
|
||||
index_entries = list(map(f,
|
||||
(t for t in toc.flat() if t is not toc),
|
||||
(i-1 for i, t in enumerate(toc.flat()) if t is not toc)
|
||||
))
|
||||
index_entries.sort(key=attrgetter('sort_key'))
|
||||
[ i.find_end(index_entries) for i in index_entries ]
|
||||
[i.find_end(index_entries) for i in index_entries]
|
||||
|
||||
ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor')
|
||||
|
||||
for spine_pos, spine_item in enumerate(spine):
|
||||
for i in index_entries:
|
||||
if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos:
|
||||
continue # Does not touch this file
|
||||
continue # Does not touch this file
|
||||
start = i.anchor if i.spine_pos == spine_pos else None
|
||||
end = i.end_anchor if i.spine_pos == spine_pos else None
|
||||
spine_item.index_entries.append(ie(i, start, end))
|
||||
|
||||
|
||||
|
@ -353,12 +353,19 @@ class FlowSplitter(object):
|
||||
nix_element(elem)
|
||||
|
||||
# Tree 2
|
||||
ancestors = frozenset(XPath('ancestor::*')(split_point2))
|
||||
for elem in tuple(body2.iterdescendants()):
|
||||
if elem is split_point2:
|
||||
if not before:
|
||||
nix_element(elem)
|
||||
break
|
||||
nix_element(elem, top=False)
|
||||
if elem in ancestors:
|
||||
# We have to preserve the ancestors as they could have CSS
|
||||
# styles that are inherited/applicable, like font or
|
||||
# width. So we only remove the text, if any.
|
||||
elem.text = '\n'
|
||||
else:
|
||||
nix_element(elem, top=False)
|
||||
|
||||
body2.text = '\n'
|
||||
|
||||
|
@ -27,7 +27,13 @@ class ProfileModel(QAbstractListModel):
|
||||
if role == Qt.DisplayRole:
|
||||
return QVariant(profile.name)
|
||||
if role in (Qt.ToolTipRole, Qt.StatusTipRole, Qt.WhatsThisRole):
|
||||
return QVariant(profile.description)
|
||||
w, h = profile.screen_size
|
||||
if w >= 10000:
|
||||
ss = _('unlimited')
|
||||
else:
|
||||
ss = _('%d x %d pixels') % (w, h)
|
||||
ss = _('Screen size: %s') % ss
|
||||
return QVariant('%s [%s]' % (profile.description, ss))
|
||||
return NONE
|
||||
|
||||
class PageSetupWidget(Widget, Ui_Form):
|
||||
|
@ -212,7 +212,7 @@ class StatusBar(QStatusBar): # {{{
|
||||
if self.library_total != self.total:
|
||||
base = _('{0}, {1} total').format(base, self.library_total)
|
||||
|
||||
self.defmsg.setText('%s [%s]' % (msg, base))
|
||||
self.defmsg.setText(u'%s\xa0\xa0\xa0\xa0[%s]' % (msg, base))
|
||||
self.clearMessage()
|
||||
|
||||
def device_disconnected(self):
|
||||
|
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
Job management.
|
||||
'''
|
||||
|
||||
import re
|
||||
import re, time
|
||||
from Queue import Empty, Queue
|
||||
|
||||
from PyQt4.Qt import (QAbstractTableModel, QVariant, QModelIndex, Qt,
|
||||
@ -29,7 +29,7 @@ from calibre.gui2.threaded_jobs import ThreadedJobServer, ThreadedJob
|
||||
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
|
||||
from calibre.utils.icu import lower
|
||||
|
||||
class JobManager(QAbstractTableModel, SearchQueryParser): # {{{
|
||||
class JobManager(QAbstractTableModel, SearchQueryParser): # {{{
|
||||
|
||||
job_added = pyqtSignal(int)
|
||||
job_done = pyqtSignal(int)
|
||||
@ -55,7 +55,7 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{
|
||||
self.timer.start(1000)
|
||||
|
||||
def columnCount(self, parent=QModelIndex()):
|
||||
return 4
|
||||
return 5
|
||||
|
||||
def rowCount(self, parent=QModelIndex()):
|
||||
return len(self.jobs)
|
||||
@ -64,11 +64,13 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{
|
||||
if role != Qt.DisplayRole:
|
||||
return NONE
|
||||
if orientation == Qt.Horizontal:
|
||||
if section == 0: text = _('Job')
|
||||
elif section == 1: text = _('Status')
|
||||
elif section == 2: text = _('Progress')
|
||||
elif section == 3: text = _('Running time')
|
||||
return QVariant(text)
|
||||
return QVariant({
|
||||
0: _('Job'),
|
||||
1: _('Status'),
|
||||
2: _('Progress'),
|
||||
3: _('Running time'),
|
||||
4: _('Start time'),
|
||||
}.get(section, ''))
|
||||
else:
|
||||
return QVariant(section+1)
|
||||
|
||||
@ -117,6 +119,8 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{
|
||||
if rtime is None:
|
||||
return NONE
|
||||
return QVariant('%dm %ds'%(int(rtime)//60, int(rtime)%60))
|
||||
if col == 4 and job.start_time is not None:
|
||||
return QVariant(time.strftime('%H:%M -- %d %b', time.localtime(job.start_time)))
|
||||
if role == Qt.DecorationRole and col == 0:
|
||||
state = job.run_state
|
||||
if state == job.WAITING:
|
||||
@ -220,7 +224,7 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{
|
||||
def has_device_jobs(self, queued_also=False):
|
||||
for job in self.jobs:
|
||||
if isinstance(job, DeviceJob):
|
||||
if job.duration is None: # Running or waiting
|
||||
if job.duration is None: # Running or waiting
|
||||
if (job.is_running or queued_also):
|
||||
return True
|
||||
return False
|
||||
@ -341,7 +345,7 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class FilterModel(QSortFilterProxyModel): # {{{
|
||||
class FilterModel(QSortFilterProxyModel): # {{{
|
||||
|
||||
search_done = pyqtSignal(object)
|
||||
|
||||
@ -376,7 +380,7 @@ class FilterModel(QSortFilterProxyModel): # {{{
|
||||
|
||||
# Jobs UI {{{
|
||||
|
||||
class ProgressBarDelegate(QAbstractItemDelegate): # {{{
|
||||
class ProgressBarDelegate(QAbstractItemDelegate): # {{{
|
||||
|
||||
def sizeHint(self, option, index):
|
||||
return QSize(120, 30)
|
||||
@ -395,7 +399,7 @@ class ProgressBarDelegate(QAbstractItemDelegate): # {{{
|
||||
QApplication.style().drawControl(QStyle.CE_ProgressBar, opts, painter)
|
||||
# }}}
|
||||
|
||||
class DetailView(QDialog, Ui_Dialog): # {{{
|
||||
class DetailView(QDialog, Ui_Dialog): # {{{
|
||||
|
||||
def __init__(self, parent, job):
|
||||
QDialog.__init__(self, parent)
|
||||
@ -432,7 +436,7 @@ class DetailView(QDialog, Ui_Dialog): # {{{
|
||||
self.log.appendPlainText(more.decode('utf-8', 'replace'))
|
||||
# }}}
|
||||
|
||||
class JobsButton(QFrame): # {{{
|
||||
class JobsButton(QFrame): # {{{
|
||||
|
||||
def __init__(self, horizontal=False, size=48, parent=None):
|
||||
QFrame.__init__(self, parent)
|
||||
@ -471,7 +475,6 @@ class JobsButton(QFrame): # {{{
|
||||
job_manager.job_done.connect(self.job_done)
|
||||
self.jobs_dialog.addAction(self.action_toggle)
|
||||
|
||||
|
||||
def mouseReleaseEvent(self, event):
|
||||
self.toggle()
|
||||
|
||||
@ -554,7 +557,7 @@ class JobsDialog(QDialog, Ui_JobsDialog):
|
||||
try:
|
||||
geom = gprefs.get('jobs_dialog_geometry', bytearray(''))
|
||||
self.restoreGeometry(QByteArray(geom))
|
||||
state = gprefs.get('jobs view column layout', bytearray(''))
|
||||
state = gprefs.get('jobs view column layout2', bytearray(''))
|
||||
self.jobs_view.horizontalHeader().restoreState(QByteArray(state))
|
||||
except:
|
||||
pass
|
||||
@ -566,7 +569,7 @@ class JobsDialog(QDialog, Ui_JobsDialog):
|
||||
def save_state(self):
|
||||
try:
|
||||
state = bytearray(self.jobs_view.horizontalHeader().saveState())
|
||||
gprefs['jobs view column layout'] = state
|
||||
gprefs['jobs view column layout2'] = state
|
||||
geom = bytearray(self.saveGeometry())
|
||||
gprefs['jobs_dialog_geometry'] = geom
|
||||
except:
|
||||
@ -640,8 +643,13 @@ class JobsDialog(QDialog, Ui_JobsDialog):
|
||||
self.save_state()
|
||||
return QDialog.hide(self, *args)
|
||||
|
||||
def reject(self):
|
||||
self.save_state()
|
||||
QDialog.reject(self)
|
||||
|
||||
def find(self, query):
|
||||
self.proxy_model.find(query)
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -22,7 +22,7 @@ from calibre.customize.ui import preferences_plugins
|
||||
|
||||
ICON_SIZE = 32
|
||||
|
||||
class StatusBar(QStatusBar): # {{{
|
||||
class StatusBar(QStatusBar): # {{{
|
||||
|
||||
def __init__(self, parent=None):
|
||||
QStatusBar.__init__(self, parent)
|
||||
@ -39,7 +39,7 @@ class StatusBar(QStatusBar): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class BarTitle(QWidget): # {{{
|
||||
class BarTitle(QWidget): # {{{
|
||||
|
||||
def __init__(self, parent=None):
|
||||
QWidget.__init__(self, parent)
|
||||
@ -67,7 +67,7 @@ class BarTitle(QWidget): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class Category(QWidget): # {{{
|
||||
class Category(QWidget): # {{{
|
||||
|
||||
plugin_activated = pyqtSignal(object)
|
||||
|
||||
@ -112,7 +112,7 @@ class Category(QWidget): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class Browser(QScrollArea): # {{{
|
||||
class Browser(QScrollArea): # {{{
|
||||
|
||||
show_plugin = pyqtSignal(object)
|
||||
|
||||
@ -221,6 +221,7 @@ class Preferences(QMainWindow):
|
||||
self.stack.addWidget(self.scroll_area)
|
||||
self.scroll_area.setWidgetResizable(True)
|
||||
|
||||
self.setContextMenuPolicy(Qt.NoContextMenu)
|
||||
self.bar = QToolBar(self)
|
||||
self.addToolBar(self.bar)
|
||||
self.bar.setVisible(False)
|
||||
@ -304,7 +305,6 @@ class Preferences(QMainWindow):
|
||||
self.bar.setVisible(True)
|
||||
self.bb.setVisible(False)
|
||||
|
||||
|
||||
def hide_plugin(self):
|
||||
self.showing_widget = QWidget(self.scroll_area)
|
||||
self.scroll_area.setWidget(self.showing_widget)
|
||||
@ -355,7 +355,6 @@ class Preferences(QMainWindow):
|
||||
if do_restart:
|
||||
self.gui.quit(restart=True)
|
||||
|
||||
|
||||
def cancel(self, *args):
|
||||
if self.close_after_initial:
|
||||
self.close()
|
||||
@ -389,3 +388,4 @@ if __name__ == '__main__':
|
||||
p.show()
|
||||
app.exec_()
|
||||
gui.shutdown()
|
||||
|
||||
|
@ -26,6 +26,9 @@ from calibre.utils.filenames import ascii_filename
|
||||
|
||||
class SearchDialog(QDialog, Ui_Dialog):
|
||||
|
||||
SEARCH_TEXT = _('&Search')
|
||||
STOP_TEXT = _('&Stop')
|
||||
|
||||
def __init__(self, gui, parent=None, query=''):
|
||||
QDialog.__init__(self, parent)
|
||||
self.setupUi(self)
|
||||
@ -89,7 +92,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
self.configure.setIcon(QIcon(I('config.png')))
|
||||
|
||||
self.adv_search_button.clicked.connect(self.build_adv_search)
|
||||
self.search.clicked.connect(self.do_search)
|
||||
self.search.clicked.connect(self.toggle_search)
|
||||
self.checker.timeout.connect(self.get_results)
|
||||
self.progress_checker.timeout.connect(self.check_progress)
|
||||
self.results_view.activated.connect(self.result_item_activated)
|
||||
@ -101,6 +104,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
self.select_none_stores.clicked.connect(self.stores_select_none)
|
||||
self.configure.clicked.connect(self.do_config)
|
||||
self.finished.connect(self.dialog_closed)
|
||||
self.searching = False
|
||||
|
||||
self.progress_checker.start(100)
|
||||
|
||||
@ -161,6 +165,18 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
# Affiliate
|
||||
self.results_view.setColumnWidth(6, 20)
|
||||
|
||||
def toggle_search(self):
|
||||
if self.searching:
|
||||
self.search_pool.abort()
|
||||
m = self.results_view.model()
|
||||
m.details_pool.abort()
|
||||
m.cover_pool.abort()
|
||||
self.search.setText(self.SEARCH_TEXT)
|
||||
self.checker.stop()
|
||||
self.searching = False
|
||||
else:
|
||||
self.do_search()
|
||||
|
||||
def do_search(self):
|
||||
# Stop all running threads.
|
||||
self.checker.stop()
|
||||
@ -182,6 +198,8 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
_('You must enter a title, author or keyword to'
|
||||
' search for.'), show=True)
|
||||
return
|
||||
self.searching = True
|
||||
self.search.setText(self.STOP_TEXT)
|
||||
# Give the query to the results model so it can do
|
||||
# futher filtering.
|
||||
self.results_view.model().set_query(query)
|
||||
@ -198,7 +216,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
query = self.clean_query(query)
|
||||
shuffle(store_names)
|
||||
# Add plugins that the user has checked to the search pool's work queue.
|
||||
self.gui.istores.join(4.0) # Wait for updated plugins to load
|
||||
self.gui.istores.join(4.0) # Wait for updated plugins to load
|
||||
for n in store_names:
|
||||
if self.store_checks[n].isChecked():
|
||||
self.search_pool.add_task(query, n, self.gui.istores[n], self.max_results, self.timeout)
|
||||
@ -387,9 +405,15 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked())
|
||||
|
||||
def check_progress(self):
|
||||
if not self.search_pool.threads_running() and not self.results_view.model().cover_pool.threads_running() and not self.results_view.model().details_pool.threads_running():
|
||||
m = self.results_view.model()
|
||||
if not self.search_pool.threads_running() and not m.cover_pool.threads_running() and not m.details_pool.threads_running():
|
||||
self.pi.stopAnimation()
|
||||
self.search.setText(self.SEARCH_TEXT)
|
||||
self.searching = False
|
||||
else:
|
||||
self.searching = True
|
||||
if unicode(self.search.text()) != self.STOP_TEXT:
|
||||
self.search.setText(self.STOP_TEXT)
|
||||
if not self.pi.isAnimated():
|
||||
self.pi.startAnimation()
|
||||
|
||||
@ -427,3 +451,4 @@ if __name__ == '__main__':
|
||||
s = SearchDialog(gui, query=' '.join(sys.argv[1:]))
|
||||
s.exec_()
|
||||
|
||||
|
||||
|
@ -15,6 +15,7 @@ from PyQt4.Qt import (QFont, QVariant, QDialog, Qt, QColor, QColorDialog,
|
||||
|
||||
from calibre.constants import iswindows, isxp
|
||||
from calibre.utils.config import Config, StringConfig, JSONConfig
|
||||
from calibre.gui2 import min_available_height
|
||||
from calibre.gui2.shortcuts import ShortcutConfig
|
||||
from calibre.gui2.viewer.config_ui import Ui_Dialog
|
||||
from calibre.utils.localization import get_language
|
||||
@ -140,6 +141,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
||||
self.init_load_themes()
|
||||
|
||||
self.clear_search_history_button.clicked.connect(self.clear_search_history)
|
||||
self.resize(self.width(), min(self.height(), max(575, min_available_height()-25)))
|
||||
|
||||
def clear_search_history(self):
|
||||
from calibre.gui2 import config
|
||||
|
@ -29,7 +29,7 @@ from calibre.ebooks.oeb.display.webview import load_html
|
||||
from calibre.constants import isxp, iswindows
|
||||
# }}}
|
||||
|
||||
class Document(QWebPage): # {{{
|
||||
class Document(QWebPage): # {{{
|
||||
|
||||
page_turn = pyqtSignal(object)
|
||||
mark_element = pyqtSignal(QWebElement)
|
||||
@ -356,7 +356,8 @@ class Document(QWebPage): # {{{
|
||||
self.mainFrame().setScrollPosition(QPoint(x, y))
|
||||
|
||||
def jump_to_anchor(self, anchor):
|
||||
if not self.loaded_javascript: return
|
||||
if not self.loaded_javascript:
|
||||
return
|
||||
self.javascript('window.paged_display.jump_to_anchor("%s")'%anchor)
|
||||
|
||||
def element_ypos(self, elem):
|
||||
@ -447,7 +448,7 @@ class Document(QWebPage): # {{{
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
|
||||
return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
|
||||
|
||||
def set_bottom_padding(self, amount):
|
||||
s = QSize(-1, -1) if amount == 0 else QSize(self.viewportSize().width(),
|
||||
@ -460,7 +461,7 @@ class Document(QWebPage): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class DocumentView(QWebView): # {{{
|
||||
class DocumentView(QWebView): # {{{
|
||||
|
||||
magnification_changed = pyqtSignal(object)
|
||||
DISABLED_BRUSH = QBrush(Qt.lightGray, Qt.Dense5Pattern)
|
||||
@ -766,8 +767,10 @@ class DocumentView(QWebView): # {{{
|
||||
|
||||
@dynamic_property
|
||||
def current_language(self):
|
||||
def fget(self): return self.document.current_language
|
||||
def fset(self, val): self.document.current_language = val
|
||||
def fget(self):
|
||||
return self.document.current_language
|
||||
def fset(self, val):
|
||||
self.document.current_language = val
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
def search(self, text, backwards=False):
|
||||
@ -816,7 +819,6 @@ class DocumentView(QWebView): # {{{
|
||||
self.scrollbar.blockSignals(False)
|
||||
self._ignore_scrollbar_signals = False
|
||||
|
||||
|
||||
def load_finished(self, ok):
|
||||
if self.loading_url is None:
|
||||
# An <iframe> finished loading
|
||||
@ -960,8 +962,8 @@ class DocumentView(QWebView): # {{{
|
||||
window_height = self.document.window_height
|
||||
document_height = self.document.height
|
||||
ddelta = document_height - window_height
|
||||
#print '\nWindow height:', window_height
|
||||
#print 'Document height:', self.document.height
|
||||
# print '\nWindow height:', window_height
|
||||
# print 'Document height:', self.document.height
|
||||
|
||||
delta_y = window_height - 25
|
||||
if self.document.at_bottom or ddelta <= 0:
|
||||
@ -974,19 +976,19 @@ class DocumentView(QWebView): # {{{
|
||||
return
|
||||
else:
|
||||
oopos = self.document.ypos
|
||||
#print 'Original position:', oopos
|
||||
# print 'Original position:', oopos
|
||||
self.document.set_bottom_padding(0)
|
||||
opos = self.document.ypos
|
||||
#print 'After set padding=0:', self.document.ypos
|
||||
# print 'After set padding=0:', self.document.ypos
|
||||
if opos < oopos:
|
||||
if self.manager is not None:
|
||||
if epf:
|
||||
self.flipper.initialize(self.current_page_image())
|
||||
self.manager.next_document()
|
||||
return
|
||||
#oheight = self.document.height
|
||||
lower_limit = opos + delta_y # Max value of top y co-ord after scrolling
|
||||
max_y = self.document.height - window_height # The maximum possible top y co-ord
|
||||
# oheight = self.document.height
|
||||
lower_limit = opos + delta_y # Max value of top y co-ord after scrolling
|
||||
max_y = self.document.height - window_height # The maximum possible top y co-ord
|
||||
if max_y < lower_limit:
|
||||
padding = lower_limit - max_y
|
||||
if padding == window_height:
|
||||
@ -995,28 +997,28 @@ class DocumentView(QWebView): # {{{
|
||||
self.flipper.initialize(self.current_page_image())
|
||||
self.manager.next_document()
|
||||
return
|
||||
#print 'Setting padding to:', lower_limit - max_y
|
||||
# print 'Setting padding to:', lower_limit - max_y
|
||||
self.document.set_bottom_padding(lower_limit - max_y)
|
||||
if epf:
|
||||
self.flipper.initialize(self.current_page_image())
|
||||
#print 'Document height:', self.document.height
|
||||
#print 'Height change:', (self.document.height - oheight)
|
||||
# print 'Document height:', self.document.height
|
||||
# print 'Height change:', (self.document.height - oheight)
|
||||
max_y = self.document.height - window_height
|
||||
lower_limit = min(max_y, lower_limit)
|
||||
#print 'Scroll to:', lower_limit
|
||||
# print 'Scroll to:', lower_limit
|
||||
if lower_limit > opos:
|
||||
self.document.scroll_to(self.document.xpos, lower_limit)
|
||||
actually_scrolled = self.document.ypos - opos
|
||||
#print 'After scroll pos:', self.document.ypos
|
||||
#print 'Scrolled by:', self.document.ypos - opos
|
||||
# print 'After scroll pos:', self.document.ypos
|
||||
# print 'Scrolled by:', self.document.ypos - opos
|
||||
self.find_next_blank_line(window_height - actually_scrolled)
|
||||
#print 'After blank line pos:', self.document.ypos
|
||||
# print 'After blank line pos:', self.document.ypos
|
||||
if epf:
|
||||
self.flipper(self.current_page_image(),
|
||||
duration=self.document.page_flip_duration)
|
||||
if self.manager is not None:
|
||||
self.manager.scrolled(self.scroll_fraction)
|
||||
#print 'After all:', self.document.ypos
|
||||
# print 'After all:', self.document.ypos
|
||||
|
||||
def page_turn_requested(self, backwards):
|
||||
if backwards:
|
||||
@ -1110,7 +1112,8 @@ class DocumentView(QWebView): # {{{
|
||||
return
|
||||
|
||||
if self.document.in_paged_mode:
|
||||
if abs(event.delta()) < 15: return
|
||||
if abs(event.delta()) < 15:
|
||||
return
|
||||
typ = 'screen' if self.document.wheel_flips_pages else 'col'
|
||||
direction = 'next' if event.delta() < 0 else 'previous'
|
||||
loc = self.document.javascript('paged_display.%s_%s_location()'%(
|
||||
@ -1134,7 +1137,7 @@ class DocumentView(QWebView): # {{{
|
||||
event.accept()
|
||||
return
|
||||
if self.document.at_bottom:
|
||||
self.scroll_by(y=15) # at_bottom can lie on windows
|
||||
self.scroll_by(y=15) # at_bottom can lie on windows
|
||||
if self.manager is not None:
|
||||
self.manager.next_document()
|
||||
event.accept()
|
||||
@ -1218,6 +1221,12 @@ class DocumentView(QWebView): # {{{
|
||||
self.paged_col_scroll()
|
||||
else:
|
||||
self.scroll_by(x=15)
|
||||
elif key == 'Back':
|
||||
if self.manager is not None:
|
||||
self.manager.back(None)
|
||||
elif key == 'Forward':
|
||||
if self.manager is not None:
|
||||
self.manager.forward(None)
|
||||
else:
|
||||
handled = False
|
||||
return handled
|
||||
@ -1256,3 +1265,4 @@ class DocumentView(QWebView): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -44,4 +44,10 @@ SHORTCUTS = {
|
||||
'Right' : (['L', 'Right'],
|
||||
_('Scroll right')),
|
||||
|
||||
'Back': (['Alt+Left'],
|
||||
_('Back')),
|
||||
|
||||
'Forward': (['Alt+Right'],
|
||||
_('Forward')),
|
||||
|
||||
}
|
||||
|
@ -706,6 +706,8 @@ class CatalogBuilder(object):
|
||||
if last_c in exceptions.keys():
|
||||
last_c = exceptions[unicode(last_c)]
|
||||
last_ordnum = ordnum
|
||||
else:
|
||||
last_c = cl_list[idx-1]
|
||||
cl_list[idx] = last_c
|
||||
|
||||
if self.DEBUG and self.opts.verbose:
|
||||
|
@ -7,7 +7,7 @@ import sys, os, cPickle, textwrap, stat
|
||||
from subprocess import check_call
|
||||
from functools import partial
|
||||
|
||||
from calibre import __appname__, prints, guess_type
|
||||
from calibre import __appname__, prints, guess_type
|
||||
from calibre.constants import islinux, isnetbsd, isbsd
|
||||
from calibre.customize.ui import all_input_formats
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
@ -15,7 +15,7 @@ from calibre import CurrentDir
|
||||
|
||||
|
||||
entry_points = {
|
||||
'console_scripts': [ \
|
||||
'console_scripts': [
|
||||
'ebook-device = calibre.devices.cli:main',
|
||||
'ebook-meta = calibre.ebooks.metadata.cli:main',
|
||||
'ebook-convert = calibre.ebooks.conversion.cli:main',
|
||||
@ -123,7 +123,7 @@ os.remove(os.path.abspath(__file__))
|
||||
|
||||
# }}}
|
||||
|
||||
class ZshCompleter(object): # {{{
|
||||
class ZshCompleter(object): # {{{
|
||||
|
||||
def __init__(self, opts):
|
||||
self.opts = opts
|
||||
@ -196,7 +196,8 @@ class ZshCompleter(object): # {{{
|
||||
|
||||
def opts_and_exts(self, name, op, exts, cover_opts=('--cover',),
|
||||
opf_opts=('--opf',), file_map={}):
|
||||
if not self.dest: return
|
||||
if not self.dest:
|
||||
return
|
||||
exts = set(exts).union(x.upper() for x in exts)
|
||||
pats = ('*.%s'%x for x in exts)
|
||||
extra = ("'*:filename:_files -g \"%s\"' "%' '.join(pats),)
|
||||
@ -206,7 +207,8 @@ class ZshCompleter(object): # {{{
|
||||
self.commands[name] = txt
|
||||
|
||||
def opts_and_words(self, name, op, words, takes_files=False):
|
||||
if not self.dest: return
|
||||
if not self.dest:
|
||||
return
|
||||
extra = ("'*:filename:_files' ",) if takes_files else ()
|
||||
opts = '\\\n '.join(tuple(self.get_options(op())) + extra)
|
||||
txt = '_arguments -s \\\n ' + opts
|
||||
@ -273,7 +275,8 @@ class ZshCompleter(object): # {{{
|
||||
):
|
||||
for fmt in fmts:
|
||||
is_input = group_title == input_group
|
||||
if is_input and fmt in {'rar', 'zip', 'oebzip'}: continue
|
||||
if is_input and fmt in {'rar', 'zip', 'oebzip'}:
|
||||
continue
|
||||
p = (get_parser(input_fmt=fmt) if is_input
|
||||
else get_parser(output_fmt=fmt))
|
||||
opts = None
|
||||
@ -282,7 +285,8 @@ class ZshCompleter(object): # {{{
|
||||
opts = [o for o in group.option_list if
|
||||
'--pretty-print' not in o._long_opts and
|
||||
'--input-encoding' not in o._long_opts]
|
||||
if not opts: continue
|
||||
if not opts:
|
||||
continue
|
||||
opts = '\\\n '.join(tuple(self.get_options(opts)))
|
||||
w('\n%s() {'%(func%fmt))
|
||||
w('\n _arguments -s \\\n ' + opts)
|
||||
@ -407,7 +411,6 @@ class PostInstall:
|
||||
self.warnings.append((args, kwargs))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def __init__(self, opts, info=prints, warn=None, manifest=None):
|
||||
self.opts = opts
|
||||
self.info = info
|
||||
@ -482,8 +485,7 @@ class PostInstall:
|
||||
raise
|
||||
self.task_failed('Creating uninstaller failed')
|
||||
|
||||
|
||||
def setup_completion(self): # {{{
|
||||
def setup_completion(self): # {{{
|
||||
try:
|
||||
self.info('Setting up command-line completion...')
|
||||
from calibre.ebooks.metadata.cli import option_parser as metaop, filetypes as meta_filetypes
|
||||
@ -542,7 +544,7 @@ class PostInstall:
|
||||
o_and_w('fetch-ebook-metadata', fem_op, [])
|
||||
o_and_w('calibre-smtp', smtp_op, [])
|
||||
o_and_w('calibre-server', serv_op, [])
|
||||
o_and_e('calibre-debug', debug_op, ['py', 'recipe'], file_map={
|
||||
o_and_e('calibre-debug', debug_op, ['py', 'recipe', 'mobi', 'azw', 'azw3', 'docx'], file_map={
|
||||
'--tweak-book':['epub', 'azw3', 'mobi'],
|
||||
'--subset-font':['ttf', 'otf'],
|
||||
'--exec-file':['py', 'recipe'],
|
||||
@ -636,7 +638,7 @@ class PostInstall:
|
||||
self.task_failed('Setting up completion failed')
|
||||
# }}}
|
||||
|
||||
def setup_desktop_integration(self): # {{{
|
||||
def setup_desktop_integration(self): # {{{
|
||||
try:
|
||||
self.info('Setting up desktop integration...')
|
||||
|
||||
@ -745,7 +747,7 @@ def opts_and_words(name, op, words, takes_files=False):
|
||||
opts = '|'.join(options(op))
|
||||
words = '|'.join([w.replace("'", "\\'") for w in words])
|
||||
fname = name.replace('-', '_')
|
||||
return ('_'+fname+'()'+\
|
||||
return ('_'+fname+'()'+
|
||||
'''
|
||||
{
|
||||
local cur opts
|
||||
@ -922,3 +924,4 @@ def main():
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
|
408
src/calibre/web/feeds/jsnews.py
Normal file
408
src/calibre/web/feeds/jsnews.py
Normal file
@ -0,0 +1,408 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, re
|
||||
from io import BytesIO
|
||||
from functools import partial
|
||||
|
||||
from calibre import force_unicode, walk
|
||||
from calibre.constants import __appname__
|
||||
from calibre.web.feeds import feeds_from_index
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.fetch.javascript import fetch_page, AbortFetch, links_from_selectors
|
||||
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||
|
||||
def image_data_to_url(data, base='cover'):
|
||||
from calibre.utils.imghdr import what
|
||||
ans = BytesIO(data)
|
||||
ext = what(None, data)
|
||||
if not ext:
|
||||
if data.startswith(b'%PDF-'):
|
||||
ext = 'pdf'
|
||||
else:
|
||||
ext = 'jpg'
|
||||
ans.name = 'cover.' + ext
|
||||
return ans
|
||||
|
||||
class JavascriptRecipe(BasicNewsRecipe):
|
||||
|
||||
'''
|
||||
|
||||
This recipe class is used to download content from javascript heavy
|
||||
sites. It uses a full WebKit browser to do the downloading, therefore it
|
||||
can support sites that use javascript to dynamically fetch content.
|
||||
|
||||
Most of the parameters from :class:`BasicNewsRecipe` still apply, apart
|
||||
from those noted specifically below. The biggest difference is that you use
|
||||
CSS selectors to specify tags to keep and remove as well as links to
|
||||
follow, instead of the BeautifulSoup selectors used in
|
||||
:class:`BasicNewsRecipe`. Indeed, BeautifulSoup has been completely removed
|
||||
and replaced by lxml, whereever you previously expected BeautifulSoup to
|
||||
represent parsed HTML, you will now get lxml trees. See
|
||||
http://lxml.de/tutorial.html for a tutorial on using lxml.
|
||||
|
||||
The various article pre-processing callbacks such as ``preprocess_html()``
|
||||
and ``skip_ad_pages()`` have all been replaced by just two callbacks,
|
||||
:meth:`preprocess_stage1` and :meth:`preprocess_stage2`. These methods are
|
||||
a passed the browser instance, and can thus do anything they like.
|
||||
|
||||
An important method that you will often have to implement is
|
||||
:meth:`load_complete` to tell the download system when a page has finished
|
||||
loading and is ready to be scraped.
|
||||
|
||||
You can use the builtin recipe for time.com as an example of the usage of
|
||||
this class.
|
||||
'''
|
||||
|
||||
#: Minimum calibre version needed to use this recipe
|
||||
requires_version = (0, 9, 35)
|
||||
|
||||
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
|
||||
#: A tag is specified using CSS selectors.
|
||||
#: A common example::
|
||||
#:
|
||||
#: remove_tags = ['div.advert', 'div.tools']
|
||||
#:
|
||||
#: This will remove all `<div class="advert">` and `<div class="tools">` tags and all
|
||||
#: their children from the downloaded :term:`HTML`.
|
||||
remove_tags = ()
|
||||
|
||||
#: Remove all tags that occur after the specified tag.
|
||||
#: A tag is specified using CSS selectors.
|
||||
#: For example::
|
||||
#:
|
||||
# : remove_tags_after = '#content'
|
||||
#:
|
||||
#: will remove all tags after the first element with `id="content"`.
|
||||
remove_tags_after = None
|
||||
|
||||
#: Remove all tags that occur before the specified tag.
|
||||
#: A tag is specified using CSS selectors.
|
||||
#: For example::
|
||||
#:
|
||||
# : remove_tags_before = '#content'
|
||||
#:
|
||||
#: will remove all tags before the first element with `id="content"`.
|
||||
remove_tags_before = None
|
||||
|
||||
#: Keep only the specified tags and their children.
|
||||
#: Uses the CSS selector syntax.
|
||||
#: If this list is not empty, then the `<body>` tag will be emptied and re-filled with
|
||||
#: the tags that match the entries in this list. For example::
|
||||
#:
|
||||
# : keep_only_tags = ['#content', '#heading']
|
||||
#:
|
||||
#: will keep only tags that have an `id` attribute of `"content"` or `"heading"`.
|
||||
keep_only_tags = ()
|
||||
|
||||
#: A list of selectors that match <a href> elements that you want followed.
|
||||
#: For this to work you must also set recursions to at least 1.
|
||||
#: You can get more control by re-implemnting :met:`select_links` in your sub-class.
|
||||
links_from_selectors = ()
|
||||
|
||||
def select_links(self, browser, url, recursion_level):
|
||||
'''
|
||||
Override this method in your recipe to implement arbitrary link following logic. It must return a
|
||||
list of URLs, each of which will be downloaded in turn.
|
||||
'''
|
||||
return links_from_selectors(self.links_from_selectors, self.recursions, browser, url, recursion_level)
|
||||
|
||||
def get_jsbrowser(self, *args, **kwargs):
|
||||
'''
|
||||
Override this method in your recipe if you want to use a non-standard Browser object.
|
||||
'''
|
||||
from calibre.web.jsbrowser.browser import Browser
|
||||
return Browser(default_timeout=kwargs.get('default_timeout', self.timeout))
|
||||
|
||||
def do_login(self, browser, username, password):
|
||||
'''
|
||||
This method is used to login to a website that uses a paywall. Implement it in
|
||||
your recipe if the site uses a paywall. An example implementation::
|
||||
|
||||
def do_login(self, browser, username, password):
|
||||
browser.visit('http://some-page-that-has-a-login')
|
||||
form = browser.select_form(nr=0) # Select the first form on the page
|
||||
form['username'] = username
|
||||
form['password'] = password
|
||||
browser.submit(timeout=120) # Submit the form and wait at most two minutes for loading to complete
|
||||
|
||||
Note that you can also select forms with CSS2 selectors, like this::
|
||||
|
||||
browser.select_form('form#login_form')
|
||||
browser.select_from('form[name="someform"]')
|
||||
'''
|
||||
|
||||
pass
|
||||
|
||||
def get_publication_data(self, browser):
|
||||
'''
|
||||
Download the cover, the masthead image and the list of sections/articles.
|
||||
Should return a dictionary with keys 'index', 'cover' and 'masthead'.
|
||||
'cover' and 'masthead' are optional, if not present, they will be auto-generated.
|
||||
The index must be in the same format as described in :meth:`parse_index`.
|
||||
'''
|
||||
raise NotImplementedError('You must implement this method in your recipe')
|
||||
|
||||
def load_complete(self, browser, url, recursion_level):
|
||||
'''
|
||||
This method is called after every page on the website is loaded. To be
|
||||
precise, it is called when the DOM is ready. If further checks need to
|
||||
be made, they should be made here. For example, if you want to check
|
||||
that some element in the DOM is present, you would use::
|
||||
|
||||
def load_complete(self, browser, url, rl):
|
||||
browser.wait_for_element('#article-footer')
|
||||
return True
|
||||
|
||||
where article-footer is the id of the element you want to wait for.
|
||||
'''
|
||||
return True
|
||||
|
||||
def abort_article(self, msg=None):
|
||||
'''
|
||||
Call this method in any article processing callback to abort the download of the article.
|
||||
For example::
|
||||
def postprocess_html(self, article, root, url, recursion_level):
|
||||
if '/video/' in url:
|
||||
self.abort_article()
|
||||
return root
|
||||
|
||||
This will cause this article to be ignored.
|
||||
'''
|
||||
raise AbortFetch(msg or 'Article fetch aborted')
|
||||
|
||||
def preprocess_stage1(self, article, browser, url, recursion_level):
|
||||
'''
|
||||
This method is a callback called for every downloaded page, before any cleanup is done.
|
||||
'''
|
||||
pass
|
||||
|
||||
def preprocess_stage2(self, article, browser, url, recursion_level):
|
||||
'''
|
||||
This method is a callback called for every downloaded page, after the cleanup is done.
|
||||
'''
|
||||
pass
|
||||
|
||||
def postprocess_html(self, article, root, url, recursion_level):
|
||||
'''
|
||||
This method is called with the downloaded html for every page as an lxml
|
||||
tree. It is called after all cleanup and related processing is completed.
|
||||
You can use it to perform any extra cleanup,or to abort the article
|
||||
download (see :meth:`abort_article`).
|
||||
|
||||
:param article: The Article object, which represents the article being currently downloaded
|
||||
:param root: The parsed downloaded HTML, as an lxml tree, see http://lxml.de/tutorial.html
|
||||
for help with using lxml to manipulate HTML.
|
||||
:param url: The URL from which this HTML was downloaded
|
||||
:param recursion_level: This is zero for the first page in an article and > 0 for subsequent pages.
|
||||
'''
|
||||
return root
|
||||
|
||||
def index_to_soup(self, url_or_raw, raw=False):
|
||||
'''
|
||||
Convenience method that takes an URL to the index page and returns
|
||||
a parsed lxml tree representation of it. See http://lxml.de/tutorial.html
|
||||
|
||||
`url_or_raw`: Either a URL or the downloaded index page as a string
|
||||
'''
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
self.jsbrowser.start_load(url_or_raw)
|
||||
html = self.jsbrowser.html
|
||||
else:
|
||||
html = url_or_raw
|
||||
if isinstance(html, bytes):
|
||||
html = xml_to_unicode(html)[0]
|
||||
html = strip_encoding_declarations(html)
|
||||
if raw:
|
||||
return html
|
||||
import html5lib
|
||||
root = html5lib.parse(html, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||
return root
|
||||
|
||||
# ***************************** Internal API *****************************
|
||||
|
||||
def _preprocess_browser(self, article, browser, url, stage, recursion_level):
|
||||
func = getattr(self, 'preprocess_stage%d' % stage)
|
||||
return func(article, browser, url, recursion_level)
|
||||
|
||||
def _postprocess_html(self, article, feed_num, art_num, feed_len, root, url, recursion_level):
|
||||
from lxml.html.builder import STYLE
|
||||
if self.no_stylesheets:
|
||||
for link in root.xpath('//link[@href]'):
|
||||
if (link.get('type', '') or 'text/css'):
|
||||
link.getparent().remove(link)
|
||||
for style in root.xpath('//style'):
|
||||
style.getparent().remove(style)
|
||||
|
||||
# Add recipe specific styling
|
||||
head = root.xpath('//head|//body')
|
||||
head = head[0] if head else next(root.iterdescendants())
|
||||
head.append(STYLE(self.template_css + '\n\n' + (self.extra_css or '') + '\n'))
|
||||
|
||||
# Add the top navbar
|
||||
if recursion_level == 0:
|
||||
body = root.xpath('//body')
|
||||
if body:
|
||||
templ = self.navbar.generate(
|
||||
False, feed_num, art_num, feed_len, not self.has_single_feed, url,
|
||||
__appname__, center=self.center_navbar,
|
||||
extra_css=self.extra_css)
|
||||
body[0].insert(0, templ.root.xpath('//div')[0])
|
||||
|
||||
# Remove javascript
|
||||
remove_attrs = set(self.remove_attributes)
|
||||
if self.remove_javascript:
|
||||
remove_attrs.add('onload')
|
||||
for script in root.xpath('//*[name()="script" or name()="noscript"]'):
|
||||
script.getparent().remove(script)
|
||||
|
||||
# Remove specified attributes
|
||||
for attr in remove_attrs:
|
||||
for tag in root.xpath('//*[@%s]' % attr):
|
||||
tag.attrib.pop(attr, None)
|
||||
|
||||
# Remove tags that cause problems on ebook devices
|
||||
nuke = ['base', 'iframe', 'canvas', 'embed', 'command', 'datalist', 'video', 'audio', 'form']
|
||||
for tag in root.xpath('|'.join('//%s' % tag for tag in nuke)):
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
root = self.postprocess_html(article, root, url, recursion_level)
|
||||
|
||||
if root is not None:
|
||||
# Nuke HTML5 tags
|
||||
tags = ['article', 'aside', 'header', 'footer', 'nav', 'figcaption', 'figure', 'section']
|
||||
for tag in root.xpath('|'.join('//%s' % tag for tag in tags)):
|
||||
tag.tag = 'div'
|
||||
|
||||
self.populate_article_metadata(article, root, recursion_level == 0)
|
||||
|
||||
return root
|
||||
|
||||
def download(self):
|
||||
browser = self.jsbrowser = self.get_jsbrowser()
|
||||
with browser:
|
||||
try:
|
||||
if self.needs_subscription and self.username and self.password:
|
||||
self.do_login(browser, self.username, self.password)
|
||||
data = self.get_publication_data(browser)
|
||||
|
||||
# Process cover, if any
|
||||
cdata = data.get('cover', None)
|
||||
if cdata:
|
||||
self.cover_url = image_data_to_url(cdata)
|
||||
self.download_cover()
|
||||
|
||||
# Process masthead, if any
|
||||
mdata = data.get('masthead', None)
|
||||
if mdata:
|
||||
self.masthead_url = image_data_to_url(mdata)
|
||||
self.resolve_masthead()
|
||||
|
||||
# Process the list of sections/articles
|
||||
return self.build_index(data, browser)
|
||||
finally:
|
||||
self.cleanup()
|
||||
|
||||
def build_index(self, data, browser):
|
||||
sections = data.get('index', None)
|
||||
if not sections:
|
||||
raise ValueError('No articles found, aborting')
|
||||
|
||||
feeds = feeds_from_index(sections, oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
log=self.log)
|
||||
if not feeds:
|
||||
raise ValueError('No articles found, aborting')
|
||||
if self.ignore_duplicate_articles is not None:
|
||||
feeds = self.remove_duplicate_articles(feeds)
|
||||
if self.test:
|
||||
feeds = feeds[:2]
|
||||
self.has_single_feed = len(feeds) == 1
|
||||
index = os.path.join(self.output_dir, 'index.html')
|
||||
|
||||
html = self.feeds2index(feeds)
|
||||
with open(index, 'wb') as fi:
|
||||
fi.write(html)
|
||||
|
||||
if self.reverse_article_order:
|
||||
for feed in feeds:
|
||||
if hasattr(feed, 'reverse'):
|
||||
feed.reverse()
|
||||
|
||||
self.report_progress(0, _('Got feeds from index page'))
|
||||
resource_cache = {}
|
||||
|
||||
total = 0
|
||||
for feed in feeds:
|
||||
total += min(self.max_articles_per_feed, len(feed))
|
||||
num = 0
|
||||
|
||||
for f, feed in enumerate(feeds):
|
||||
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||
if not os.path.isdir(feed_dir):
|
||||
os.makedirs(feed_dir)
|
||||
|
||||
for a, article in enumerate(feed):
|
||||
if a >= self.max_articles_per_feed:
|
||||
break
|
||||
num += 1
|
||||
art_dir = os.path.join(feed_dir, 'article_%d'%a)
|
||||
if not os.path.isdir(art_dir):
|
||||
os.makedirs(art_dir)
|
||||
try:
|
||||
url = self.print_version(article.url)
|
||||
except NotImplementedError:
|
||||
url = article.url
|
||||
except:
|
||||
self.log.exception('Failed to find print version for: '+article.url)
|
||||
url = None
|
||||
if not url:
|
||||
continue
|
||||
|
||||
self.log.debug('Downloading article:', article.title, 'from', url)
|
||||
try:
|
||||
pages = fetch_page(
|
||||
url,
|
||||
load_complete=self.load_complete,
|
||||
links=self.select_links,
|
||||
remove=self.remove_tags,
|
||||
keep_only=self.keep_only_tags,
|
||||
preprocess_browser=partial(self._preprocess_browser, article),
|
||||
postprocess_html=partial(self._postprocess_html, article, f, a, len(feed)),
|
||||
remove_before=self.remove_tags_before,
|
||||
remove_after=self.remove_tags_after,
|
||||
remove_javascript=self.remove_javascript,
|
||||
delay=self.delay,
|
||||
resource_cache=resource_cache, output_dir=art_dir, browser=browser)
|
||||
except AbortFetch:
|
||||
self.log.exception('Fetching of article: %r aborted' % article.title)
|
||||
continue
|
||||
except Exception:
|
||||
self.log.exception('Fetching of article: %r failed' % article.title)
|
||||
continue
|
||||
self.log.debug('Downloaded article:', article.title, 'from', article.url)
|
||||
article.orig_url = article.url
|
||||
article.url = 'article_%d/index.html'%a
|
||||
article.downloaded = True
|
||||
article.sub_pages = pages[1:]
|
||||
self.report_progress(float(num)/total,
|
||||
_(u'Article downloaded: %s')%force_unicode(article.title))
|
||||
|
||||
for f, feed in enumerate(feeds):
|
||||
html = self.feed2index(f, feeds)
|
||||
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
|
||||
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
|
||||
fi.write(html)
|
||||
if self.no_stylesheets:
|
||||
for f in walk(self.output_dir):
|
||||
if f.endswith('.css'):
|
||||
os.remove(f)
|
||||
self.create_opf(feeds)
|
||||
self.report_progress(1, _('Download finished'))
|
||||
return index
|
||||
|
@ -1160,26 +1160,7 @@ class BasicNewsRecipe(Recipe):
|
||||
self.report_progress(0, _('Trying to download cover...'))
|
||||
self.download_cover()
|
||||
self.report_progress(0, _('Generating masthead...'))
|
||||
self.masthead_path = None
|
||||
|
||||
try:
|
||||
murl = self.get_masthead_url()
|
||||
except:
|
||||
self.log.exception('Failed to get masthead url')
|
||||
murl = None
|
||||
|
||||
if murl is not None:
|
||||
# Try downloading the user-supplied masthead_url
|
||||
# Failure sets self.masthead_path to None
|
||||
self.download_masthead(murl)
|
||||
if self.masthead_path is None:
|
||||
self.log.info("Synthesizing mastheadImage")
|
||||
self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
|
||||
try:
|
||||
self.default_masthead_image(self.masthead_path)
|
||||
except:
|
||||
self.log.exception('Failed to generate default masthead image')
|
||||
self.masthead_path = None
|
||||
self.resolve_masthead()
|
||||
|
||||
if self.test:
|
||||
feeds = feeds[:2]
|
||||
@ -1268,7 +1249,10 @@ class BasicNewsRecipe(Recipe):
|
||||
if not cu:
|
||||
return
|
||||
cdata = None
|
||||
if os.access(cu, os.R_OK):
|
||||
if hasattr(cu, 'read'):
|
||||
cdata = cu.read()
|
||||
cu = getattr(cu, 'name', 'cover.jpg')
|
||||
elif os.access(cu, os.R_OK):
|
||||
cdata = open(cu, 'rb').read()
|
||||
else:
|
||||
self.report_progress(1, _('Downloading cover from %s')%cu)
|
||||
@ -1305,13 +1289,19 @@ class BasicNewsRecipe(Recipe):
|
||||
self.cover_path = None
|
||||
|
||||
def _download_masthead(self, mu):
|
||||
ext = mu.rpartition('.')[-1]
|
||||
if '?' in ext:
|
||||
ext = ''
|
||||
if hasattr(mu, 'rpartition'):
|
||||
ext = mu.rpartition('.')[-1]
|
||||
if '?' in ext:
|
||||
ext = ''
|
||||
else:
|
||||
ext = mu.name.rpartition('.')[-1]
|
||||
ext = ext.lower() if ext else 'jpg'
|
||||
mpath = os.path.join(self.output_dir, 'masthead_source.'+ext)
|
||||
outfile = os.path.join(self.output_dir, 'mastheadImage.jpg')
|
||||
if os.access(mu, os.R_OK):
|
||||
if hasattr(mu, 'read'):
|
||||
with open(mpath, 'wb') as mfile:
|
||||
mfile.write(mu.read())
|
||||
elif os.access(mu, os.R_OK):
|
||||
with open(mpath, 'wb') as mfile:
|
||||
mfile.write(open(mu, 'rb').read())
|
||||
else:
|
||||
@ -1329,6 +1319,27 @@ class BasicNewsRecipe(Recipe):
|
||||
except:
|
||||
self.log.exception("Failed to download supplied masthead_url")
|
||||
|
||||
def resolve_masthead(self):
|
||||
self.masthead_path = None
|
||||
try:
|
||||
murl = self.get_masthead_url()
|
||||
except:
|
||||
self.log.exception('Failed to get masthead url')
|
||||
murl = None
|
||||
|
||||
if murl is not None:
|
||||
# Try downloading the user-supplied masthead_url
|
||||
# Failure sets self.masthead_path to None
|
||||
self.download_masthead(murl)
|
||||
if self.masthead_path is None:
|
||||
self.log.info("Synthesizing mastheadImage")
|
||||
self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
|
||||
try:
|
||||
self.default_masthead_image(self.masthead_path)
|
||||
except:
|
||||
self.log.exception('Failed to generate default masthead image')
|
||||
self.masthead_path = None
|
||||
|
||||
def default_cover(self, cover_file):
|
||||
'''
|
||||
Create a generic cover for recipes that dont have a cover
|
||||
|
@ -7,11 +7,12 @@ Builtin recipes.
|
||||
import re, time, io
|
||||
from calibre.web.feeds.news import (BasicNewsRecipe, CustomIndexRecipe,
|
||||
AutomaticNewsRecipe, CalibrePeriodical)
|
||||
from calibre.web.feeds.jsnews import JavascriptRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.utils.config import JSONConfig
|
||||
|
||||
basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
|
||||
CalibrePeriodical)
|
||||
CalibrePeriodical, JavascriptRecipe)
|
||||
|
||||
custom_recipes = JSONConfig('custom_recipes/index.json')
|
||||
|
||||
|
262
src/calibre/web/fetch/javascript.py
Normal file
262
src/calibre/web/fetch/javascript.py
Normal file
@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import time, os, hashlib
|
||||
from operator import attrgetter
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
|
||||
from calibre import jsbrowser
|
||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||
from calibre.utils.imghdr import what
|
||||
from calibre.web.jsbrowser.browser import Timeout
|
||||
|
||||
# remove_comments() {{{
|
||||
remove_comments = '''
|
||||
function remove_comments(node) {
|
||||
var nodes = node.childNodes, i=0, t;
|
||||
while((t = nodes.item(i++))) {
|
||||
switch(t.nodeType){
|
||||
case Node.ELEMENT_NODE:
|
||||
remove_comments(t);
|
||||
break;
|
||||
case Node.COMMENT_NODE:
|
||||
node.removeChild(t);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
}
|
||||
remove_comments(document)
|
||||
''' # }}}
|
||||
|
||||
class AbortFetch(ValueError):
|
||||
pass
|
||||
|
||||
def children(elem):
|
||||
elem = elem.firstChild()
|
||||
while not elem.isNull():
|
||||
yield elem
|
||||
elem = elem.nextSibling()
|
||||
|
||||
def apply_keep_only(browser, keep_only):
|
||||
mf = browser.page.mainFrame()
|
||||
body = mf.findFirstElement('body')
|
||||
if body.isNull():
|
||||
browser.log.error('Document has no body, cannot apply keep_only')
|
||||
return
|
||||
keep = []
|
||||
for selector in keep_only:
|
||||
keep.extend(x for x in mf.findAllElements(selector))
|
||||
if not keep:
|
||||
browser.log.error('Failed to find any elements matching the keep_only selectors: %r' % keep_only)
|
||||
return
|
||||
for elem in keep:
|
||||
body.appendInside(elem)
|
||||
for elem in tuple(children(body)):
|
||||
preserve = False
|
||||
for x in keep:
|
||||
if x == elem:
|
||||
preserve = True
|
||||
break
|
||||
if preserve:
|
||||
break
|
||||
elem.removeFromDocument()
|
||||
|
||||
def apply_remove(browser, remove):
|
||||
mf = browser.page.mainFrame()
|
||||
for selector in remove:
|
||||
for elem in mf.findAllElements(selector):
|
||||
if not elem.isNull():
|
||||
elem.removeFromDocument()
|
||||
|
||||
def remove_beyond(browser, selector, before=True):
|
||||
mf = browser.page.mainFrame()
|
||||
elem = mf.findFirstElement(selector)
|
||||
if elem.isNull():
|
||||
browser.log('Failed to find any element matching the selector: %s' % selector)
|
||||
return
|
||||
next_sibling = attrgetter('previousSibling' if before else 'nextSibling')
|
||||
|
||||
while not elem.isNull() and unicode(elem.tagName()) != 'body':
|
||||
remove = []
|
||||
after = next_sibling(elem)()
|
||||
while not after.isNull():
|
||||
remove.append(after)
|
||||
after = next_sibling(after)()
|
||||
for x in remove:
|
||||
x.removeFromDocument()
|
||||
elem = elem.parent()
|
||||
|
||||
def is_tag(elem, name):
|
||||
return unicode(elem.tagName()).lower() == name.lower()
|
||||
|
||||
def download_resources(browser, resource_cache, output_dir):
|
||||
img_counter = style_counter = 0
|
||||
resources = defaultdict(list)
|
||||
for img in browser.css_select('img[src]', all=True):
|
||||
# Using javascript ensures that absolute URLs are returned, direct
|
||||
# attribute access does not do that
|
||||
src = unicode(img.evaluateJavaScript('this.src').toString()).strip()
|
||||
if src:
|
||||
resources[src].append(img)
|
||||
for link in browser.css_select('link[href]', all=True):
|
||||
lt = unicode(link.attribute('type')).strip() or 'text/css'
|
||||
rel = unicode(link.attribute('rel')).strip() or 'stylesheet'
|
||||
if lt == 'text/css' and rel == 'stylesheet':
|
||||
href = unicode(link.evaluateJavaScript('this.href').toString()).strip()
|
||||
if href:
|
||||
resources[href].append(link)
|
||||
else:
|
||||
link.removeFromDocument()
|
||||
else:
|
||||
link.removeFromDocument()
|
||||
loaded_resources = browser.wait_for_resources(resources)
|
||||
for url, raw in loaded_resources.iteritems():
|
||||
h = hashlib.sha1(raw).digest()
|
||||
if h in resource_cache:
|
||||
href = os.path.relpath(resource_cache[h], output_dir).replace(os.sep, '/')
|
||||
else:
|
||||
elem = resources[url][0]
|
||||
if is_tag(elem, 'link'):
|
||||
style_counter += 1
|
||||
href = 'style_%d.css' % style_counter
|
||||
else:
|
||||
img_counter += 1
|
||||
ext = what(None, raw) or 'jpg'
|
||||
href = 'img_%d.%s' % (img_counter, ext)
|
||||
dest = os.path.join(output_dir, href)
|
||||
resource_cache[h] = dest
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(raw)
|
||||
for elem in resources[url]:
|
||||
elem.setAttribute('href' if is_tag(elem, 'link') else 'src', href)
|
||||
|
||||
failed = set(resources) - set(loaded_resources)
|
||||
for url in failed:
|
||||
browser.log.warn('Failed to download resource:', url)
|
||||
for elem in resources[url]:
|
||||
elem.removeFromDocument()
|
||||
|
||||
def save_html(browser, output_dir, postprocess_html, url, recursion_level):
|
||||
html = strip_encoding_declarations(browser.html)
|
||||
import html5lib
|
||||
root = html5lib.parse(html, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||
root = postprocess_html(root, url, recursion_level)
|
||||
if root is None:
|
||||
# user wants this page to be aborted
|
||||
raise AbortFetch('%s was aborted during postprocess' % url)
|
||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as f:
|
||||
from lxml.html import tostring
|
||||
f.write(tostring(root, include_meta_content_type=True, encoding='utf-8', pretty_print=True))
|
||||
return f.name
|
||||
|
||||
def links_from_selectors(selectors, recursions, browser, url, recursion_level):
|
||||
ans = []
|
||||
if recursions > recursion_level:
|
||||
for selector in selectors:
|
||||
for a in browser.css_select(selector, all=True):
|
||||
href = unicode(a.evaluateJavaScript('this.href').toString()).strip()
|
||||
if href:
|
||||
ans.append(href)
|
||||
return ans
|
||||
|
||||
|
||||
def clean_dom(
|
||||
browser, url, recursion_level, preprocess_browser, remove_javascript,
|
||||
keep_only, remove_after, remove_before, remove):
|
||||
|
||||
# Remove comments as otherwise we can end up with nested comments, which
|
||||
# cause problems later
|
||||
browser.page.mainFrame().evaluateJavaScript(remove_comments)
|
||||
|
||||
preprocess_browser(browser, url, 1, recursion_level)
|
||||
if remove_javascript:
|
||||
for elem in browser.css_select('script', all=True):
|
||||
elem.removeFromDocument()
|
||||
if keep_only:
|
||||
apply_keep_only(browser, keep_only)
|
||||
if remove_after:
|
||||
remove_beyond(browser, remove_after, before=False)
|
||||
if remove_before:
|
||||
remove_beyond(browser, remove_before, before=True)
|
||||
if remove:
|
||||
apply_remove(browser, remove)
|
||||
preprocess_browser(browser, url, 2, recursion_level)
|
||||
|
||||
def fetch_page(
|
||||
url=None,
|
||||
load_complete=lambda browser, url, recursion_level: True,
|
||||
links=lambda browser, url, recursion_level: (),
|
||||
keep_only=(),
|
||||
remove_after=None,
|
||||
remove_before=None,
|
||||
remove=(),
|
||||
remove_javascript=True,
|
||||
delay=0,
|
||||
preprocess_browser=lambda browser, url, stage, recursion_level:None,
|
||||
postprocess_html=lambda root, url, recursion_level: root,
|
||||
resource_cache={},
|
||||
output_dir=None,
|
||||
browser=None,
|
||||
recursion_level=0
|
||||
):
|
||||
|
||||
output_dir = output_dir or os.getcwdu()
|
||||
if browser is None:
|
||||
browser = jsbrowser()
|
||||
|
||||
if delay:
|
||||
time.sleep(delay)
|
||||
|
||||
# Load the DOM
|
||||
if url is not None:
|
||||
start_time = time.time()
|
||||
browser.start_load(url)
|
||||
while not load_complete(browser, url, recursion_level):
|
||||
browser.run_for_a_time(0.1)
|
||||
if time.time() - start_time > browser.default_timeout:
|
||||
raise Timeout('Timed out while waiting for %s to load' % url)
|
||||
|
||||
children = links(browser, url, recursion_level)
|
||||
|
||||
# Cleanup the DOM
|
||||
clean_dom(
|
||||
browser, url, recursion_level, preprocess_browser,
|
||||
remove_javascript, keep_only, remove_after, remove_before, remove)
|
||||
|
||||
# Download resources
|
||||
download_resources(browser, resource_cache, output_dir)
|
||||
|
||||
# Get HTML from the DOM
|
||||
pages = [save_html(browser, output_dir, postprocess_html, url, recursion_level)]
|
||||
|
||||
# Fetch the linked pages
|
||||
for i, curl in enumerate(children):
|
||||
odir = os.path.join(output_dir, 'link%d' % (i + 1))
|
||||
if not os.path.exists(odir):
|
||||
os.mkdir(odir)
|
||||
try:
|
||||
pages.extend(fetch_page(
|
||||
curl, load_complete=load_complete, links=links, keep_only=keep_only,
|
||||
remove_after=remove_after, remove_before=remove_before, remove=remove,
|
||||
preprocess_browser=preprocess_browser, postprocess_html=postprocess_html,
|
||||
resource_cache=resource_cache, output_dir=odir, browser=browser, delay=delay,
|
||||
recursion_level=recursion_level+1))
|
||||
except AbortFetch:
|
||||
continue
|
||||
return tuple(pages)
|
||||
|
||||
if __name__ == '__main__':
|
||||
browser = jsbrowser()
|
||||
fetch_page('http://www.time.com/time/magazine/article/0,9171,2145057,00.html', browser=browser,
|
||||
links=partial(links_from_selectors, ('.wp-paginate a.page[href]',), 1),
|
||||
keep_only=('article.post',), remove=('.entry-sharing', '.entry-footer', '.wp-paginate', '.post-rail'))
|
||||
|
||||
|
||||
|
||||
|
@ -7,26 +7,30 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, pprint, time
|
||||
import os, pprint, time, uuid
|
||||
from cookielib import Cookie
|
||||
from threading import current_thread
|
||||
|
||||
from PyQt4.Qt import (QObject, QNetworkAccessManager, QNetworkDiskCache,
|
||||
QNetworkProxy, QNetworkProxyFactory, QEventLoop, QUrl, pyqtSignal,
|
||||
QDialog, QVBoxLayout, QSize, QNetworkCookieJar, Qt, pyqtSlot)
|
||||
QDialog, QVBoxLayout, QSize, QNetworkCookieJar, Qt, pyqtSlot, QPixmap)
|
||||
from PyQt4.QtWebKit import QWebPage, QWebSettings, QWebView, QWebElement
|
||||
|
||||
from calibre import USER_AGENT, prints, get_proxies, get_proxy_info
|
||||
from calibre import USER_AGENT, prints, get_proxies, get_proxy_info, prepare_string_for_xml
|
||||
from calibre.constants import ispy3, cache_dir
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.logging import ThreadSafeLog
|
||||
from calibre.gui2 import must_use_qt
|
||||
from calibre.web.jsbrowser.forms import FormsMixin
|
||||
from calibre.web.jsbrowser.forms import FormsMixin, default_timeout
|
||||
|
||||
class Timeout(Exception): pass
|
||||
class Timeout(Exception):
|
||||
pass
|
||||
|
||||
class LoadError(Exception): pass
|
||||
class LoadError(Exception):
|
||||
pass
|
||||
|
||||
class WebPage(QWebPage): # {{{
|
||||
|
||||
class WebPage(QWebPage): # {{{
|
||||
|
||||
def __init__(self, log,
|
||||
confirm_callback=None,
|
||||
@ -48,6 +52,26 @@ class WebPage(QWebPage): # {{{
|
||||
QWebSettings.enablePersistentStorage(os.path.join(cache_dir(),
|
||||
'webkit-persistence'))
|
||||
QWebSettings.setMaximumPagesInCache(0)
|
||||
self.bridge_name = 'b' + uuid.uuid4().get_hex()
|
||||
self.mainFrame().javaScriptWindowObjectCleared.connect(
|
||||
self.add_window_objects)
|
||||
self.dom_loaded = False
|
||||
|
||||
def add_window_objects(self):
|
||||
self.dom_loaded = False
|
||||
mf = self.mainFrame()
|
||||
mf.addToJavaScriptWindowObject(self.bridge_name, self)
|
||||
mf.evaluateJavaScript('document.addEventListener( "DOMContentLoaded", %s.content_loaded, false )' % self.bridge_name)
|
||||
|
||||
def load_url(self, url):
|
||||
self.dom_loaded = False
|
||||
url = QUrl(url)
|
||||
self.mainFrame().load(url)
|
||||
self.ready_state # Without this, DOMContentLoaded does not fire for file:// URLs
|
||||
|
||||
@pyqtSlot()
|
||||
def content_loaded(self):
|
||||
self.dom_loaded = True
|
||||
|
||||
def userAgentForUrl(self, url):
|
||||
return self.user_agent
|
||||
@ -96,9 +120,28 @@ class WebPage(QWebPage): # {{{
|
||||
def ready_state(self):
|
||||
return unicode(self.mainFrame().evaluateJavaScript('document.readyState').toString())
|
||||
|
||||
@pyqtSlot(QPixmap)
|
||||
def transfer_image(self, img):
|
||||
self.saved_img = img
|
||||
|
||||
def get_image(self, qwe_or_selector):
|
||||
qwe = qwe_or_selector
|
||||
if not isinstance(qwe, QWebElement):
|
||||
qwe = self.mainFrame().findFirstElement(qwe)
|
||||
if qwe.isNull():
|
||||
raise ValueError('Failed to find element with selector: %r'
|
||||
% qwe_or_selector)
|
||||
self.saved_img = QPixmap()
|
||||
qwe.evaluateJavaScript('%s.transfer_image(this)' % self.bridge_name)
|
||||
try:
|
||||
return self.saved_img
|
||||
finally:
|
||||
del self.saved_img
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
class ProxyFactory(QNetworkProxyFactory): # {{{
|
||||
class ProxyFactory(QNetworkProxyFactory): # {{{
|
||||
|
||||
def __init__(self, log):
|
||||
QNetworkProxyFactory.__init__(self)
|
||||
@ -107,9 +150,11 @@ class ProxyFactory(QNetworkProxyFactory): # {{{
|
||||
for scheme, proxy_string in proxies.iteritems():
|
||||
scheme = scheme.lower()
|
||||
info = get_proxy_info(scheme, proxy_string)
|
||||
if info is None: continue
|
||||
if info is None:
|
||||
continue
|
||||
hn, port = info['hostname'], info['port']
|
||||
if not hn or not port: continue
|
||||
if not hn or not port:
|
||||
continue
|
||||
log.debug('JSBrowser using proxy:', pprint.pformat(info))
|
||||
pt = {'socks5':QNetworkProxy.Socks5Proxy}.get(scheme,
|
||||
QNetworkProxy.HttpProxy)
|
||||
@ -128,21 +173,22 @@ class ProxyFactory(QNetworkProxyFactory): # {{{
|
||||
return [self.proxies.get(scheme, self.default_proxy)]
|
||||
# }}}
|
||||
|
||||
class NetworkAccessManager(QNetworkAccessManager): # {{{
|
||||
class NetworkAccessManager(QNetworkAccessManager): # {{{
|
||||
|
||||
OPERATION_NAMES = { getattr(QNetworkAccessManager, '%sOperation'%x) :
|
||||
OPERATION_NAMES = {getattr(QNetworkAccessManager, '%sOperation'%x) :
|
||||
x.upper() for x in ('Head', 'Get', 'Put', 'Post', 'Delete',
|
||||
'Custom')
|
||||
}
|
||||
report_reply_signal = pyqtSignal(object)
|
||||
|
||||
def __init__(self, log, use_disk_cache=True, parent=None):
|
||||
def __init__(self, log, disk_cache_size=50, parent=None):
|
||||
QNetworkAccessManager.__init__(self, parent)
|
||||
self.reply_count = 0
|
||||
self.log = log
|
||||
if use_disk_cache:
|
||||
if disk_cache_size > 0:
|
||||
self.cache = QNetworkDiskCache(self)
|
||||
self.cache.setCacheDirectory(os.path.join(cache_dir(), 'jsbrowser'))
|
||||
self.cache.setCacheDirectory(PersistentTemporaryDirectory(prefix='disk_cache_'))
|
||||
self.cache.setMaximumCacheSize(int(disk_cache_size * 1024 * 1024))
|
||||
self.setCache(self.cache)
|
||||
self.sslErrors.connect(self.on_ssl_errors)
|
||||
self.pf = ProxyFactory(log)
|
||||
@ -194,10 +240,11 @@ class NetworkAccessManager(QNetworkAccessManager): # {{{
|
||||
def report_reply(self, reply):
|
||||
reply_url = unicode(reply.url().toString())
|
||||
self.reply_count += 1
|
||||
err = reply.error()
|
||||
|
||||
if reply.error():
|
||||
self.log.warn("Reply error: %s - %d (%s)" %
|
||||
(reply_url, reply.error(), reply.errorString()))
|
||||
if err:
|
||||
l = self.log.debug if err == reply.OperationCanceledError else self.log.warn
|
||||
l("Reply error: %s - %d (%s)" % (reply_url, err, unicode(reply.errorString())))
|
||||
else:
|
||||
debug = []
|
||||
debug.append("Reply successful: %s" % reply_url)
|
||||
@ -230,18 +277,18 @@ class NetworkAccessManager(QNetworkAccessManager): # {{{
|
||||
c = Cookie(0, # version
|
||||
name, value,
|
||||
None, # port
|
||||
False, # port specified
|
||||
False, # port specified
|
||||
domain, domain_specified, initial_dot, path,
|
||||
path_specified,
|
||||
secure, expires, is_session_cookie,
|
||||
None, # Comment
|
||||
None, # Comment URL
|
||||
{} # rest
|
||||
None, # Comment
|
||||
None, # Comment URL
|
||||
{} # rest
|
||||
)
|
||||
yield c
|
||||
# }}}
|
||||
|
||||
class LoadWatcher(QObject): # {{{
|
||||
class LoadWatcher(QObject): # {{{
|
||||
|
||||
def __init__(self, page, parent=None):
|
||||
QObject.__init__(self, parent)
|
||||
@ -257,7 +304,7 @@ class LoadWatcher(QObject): # {{{
|
||||
self.page = None
|
||||
# }}}
|
||||
|
||||
class BrowserView(QDialog): # {{{
|
||||
class BrowserView(QDialog): # {{{
|
||||
|
||||
def __init__(self, page, parent=None):
|
||||
QDialog.__init__(self, parent)
|
||||
@ -283,7 +330,7 @@ class Browser(QObject, FormsMixin):
|
||||
def __init__(self,
|
||||
# Logging. If None, uses a default log, which does not output
|
||||
# debugging info
|
||||
log = None,
|
||||
log=None,
|
||||
# Receives a string and returns True/False. By default, returns
|
||||
# True for all strings
|
||||
confirm_callback=None,
|
||||
@ -296,14 +343,20 @@ class Browser(QObject, FormsMixin):
|
||||
# User agent to be used
|
||||
user_agent=USER_AGENT,
|
||||
|
||||
# If True a disk cache is used
|
||||
use_disk_cache=True,
|
||||
# The size (in MB) of the on disk cache. Note that because the disk
|
||||
# cache cannot be shared between different instances, we currently
|
||||
# use a temporary dir for the cache, which is deleted on
|
||||
# program exit. Set to zero to disable cache.
|
||||
disk_cache_size=50,
|
||||
|
||||
# Enable Inspect element functionality
|
||||
enable_developer_tools=False,
|
||||
|
||||
# Verbosity
|
||||
verbosity = 0
|
||||
verbosity=0,
|
||||
|
||||
# The default timeout (in seconds)
|
||||
default_timeout=30
|
||||
):
|
||||
must_use_qt()
|
||||
QObject.__init__(self)
|
||||
@ -314,12 +367,13 @@ class Browser(QObject, FormsMixin):
|
||||
if verbosity:
|
||||
log.filter_level = log.DEBUG
|
||||
self.log = log
|
||||
self.default_timeout = default_timeout
|
||||
|
||||
self.page = WebPage(log, confirm_callback=confirm_callback,
|
||||
prompt_callback=prompt_callback, user_agent=user_agent,
|
||||
enable_developer_tools=enable_developer_tools,
|
||||
parent=self)
|
||||
self.nam = NetworkAccessManager(log, use_disk_cache=use_disk_cache, parent=self)
|
||||
self.nam = NetworkAccessManager(log, disk_cache_size=disk_cache_size, parent=self)
|
||||
self.page.setNetworkAccessManager(self.nam)
|
||||
|
||||
@property
|
||||
@ -327,6 +381,7 @@ class Browser(QObject, FormsMixin):
|
||||
return self.page.user_agent
|
||||
|
||||
def _wait_for_load(self, timeout, url=None):
|
||||
timeout = self.default_timeout if timeout is default_timeout else timeout
|
||||
loop = QEventLoop(self)
|
||||
start_time = time.time()
|
||||
end_time = start_time + timeout
|
||||
@ -358,7 +413,16 @@ class Browser(QObject, FormsMixin):
|
||||
if not loop.processEvents():
|
||||
time.sleep(0.1)
|
||||
|
||||
def visit(self, url, timeout=30.0):
|
||||
def wait_for_element(self, selector, timeout=default_timeout):
|
||||
timeout = self.default_timeout if timeout is default_timeout else timeout
|
||||
start_time = time.time()
|
||||
while self.css_select(selector) is None:
|
||||
self.run_for_a_time(0.1)
|
||||
if time.time() - start_time > timeout:
|
||||
raise Timeout('DOM failed to load in %.1g seconds' % timeout)
|
||||
return self.css_select(selector)
|
||||
|
||||
def visit(self, url, timeout=default_timeout):
|
||||
'''
|
||||
Open the page specified in URL and wait for it to complete loading.
|
||||
Note that when this method returns, there may still be javascript
|
||||
@ -369,14 +433,38 @@ class Browser(QObject, FormsMixin):
|
||||
Returns True if loading was successful, False otherwise.
|
||||
'''
|
||||
self.current_form = None
|
||||
self.page.mainFrame().load(QUrl(url))
|
||||
self.page.load_url(url)
|
||||
return self._wait_for_load(timeout, url)
|
||||
|
||||
def back(self, wait_for_load=True, timeout=default_timeout):
|
||||
'''
|
||||
Like clicking the back button in the browser. Waits for loading to complete.
|
||||
This method will raise a Timeout exception if loading takes more than timeout seconds.
|
||||
|
||||
Returns True if loading was successful, False otherwise.
|
||||
'''
|
||||
self.page.triggerAction(self.page.Back)
|
||||
if wait_for_load:
|
||||
return self._wait_for_load(timeout)
|
||||
|
||||
def stop(self):
|
||||
'Stop loading of current page'
|
||||
self.page.triggerAction(self.page.Stop)
|
||||
|
||||
def stop_scheduled_refresh(self):
|
||||
'Stop any scheduled page refresh/reloads'
|
||||
self.page.triggerAction(self.page.StopScheduledPageRefresh)
|
||||
|
||||
def reload(self, bypass_cache=False):
|
||||
action = self.page.ReloadAndBypassCache if bypass_cache else self.page.Reload
|
||||
self.page.triggerAction(action)
|
||||
|
||||
@property
|
||||
def dom_ready(self):
|
||||
return self.page.ready_state in {'complete', 'interactive'}
|
||||
return self.page.dom_loaded
|
||||
|
||||
def wait_till_dom_ready(self, timeout=30.0, url=None):
|
||||
def wait_till_dom_ready(self, timeout=default_timeout, url=None):
|
||||
timeout = self.default_timeout if timeout is default_timeout else timeout
|
||||
start_time = time.time()
|
||||
while not self.dom_ready:
|
||||
if time.time() - start_time > timeout:
|
||||
@ -384,18 +472,20 @@ class Browser(QObject, FormsMixin):
|
||||
url, timeout))
|
||||
self.run_for_a_time(0.1)
|
||||
|
||||
def start_load(self, url, timeout=30.0):
|
||||
def start_load(self, url, timeout=default_timeout, selector=None):
|
||||
'''
|
||||
Start the loading of the page at url and return once the DOM is ready,
|
||||
sub-resources such as scripts/stylesheets/images/etc. may not have all
|
||||
loaded.
|
||||
'''
|
||||
self.current_form = None
|
||||
self.page.mainFrame().load(QUrl(url))
|
||||
self.run_for_a_time(0.01)
|
||||
self.wait_till_dom_ready(timeout=timeout, url=url)
|
||||
self.page.load_url(url)
|
||||
if selector is not None:
|
||||
self.wait_for_element(selector, timeout=timeout, url=url)
|
||||
else:
|
||||
self.wait_till_dom_ready(timeout=timeout, url=url)
|
||||
|
||||
def click(self, qwe_or_selector, wait_for_load=True, ajax_replies=0, timeout=30.0):
|
||||
def click(self, qwe_or_selector, wait_for_load=True, ajax_replies=0, timeout=default_timeout):
|
||||
'''
|
||||
Click the :class:`QWebElement` pointed to by qwe_or_selector.
|
||||
|
||||
@ -408,8 +498,8 @@ class Browser(QObject, FormsMixin):
|
||||
initial_count = self.nam.reply_count
|
||||
qwe = qwe_or_selector
|
||||
if not isinstance(qwe, QWebElement):
|
||||
qwe = self.page.mainFrame().findFirstElement(qwe)
|
||||
if qwe.isNull():
|
||||
qwe = self.css_select(qwe)
|
||||
if qwe is None:
|
||||
raise ValueError('Failed to find element with selector: %r'
|
||||
% qwe_or_selector)
|
||||
js = '''
|
||||
@ -425,7 +515,7 @@ class Browser(QObject, FormsMixin):
|
||||
raise LoadError('Clicking resulted in a failed load')
|
||||
|
||||
def click_text_link(self, text_or_regex, selector='a[href]',
|
||||
wait_for_load=True, ajax_replies=0, timeout=30.0):
|
||||
wait_for_load=True, ajax_replies=0, timeout=default_timeout):
|
||||
target = None
|
||||
for qwe in self.page.mainFrame().findAllElements(selector):
|
||||
src = unicode(qwe.toPlainText())
|
||||
@ -441,6 +531,88 @@ class Browser(QObject, FormsMixin):
|
||||
return self.click(target, wait_for_load=wait_for_load,
|
||||
ajax_replies=ajax_replies, timeout=timeout)
|
||||
|
||||
def css_select(self, selector, all=False):
|
||||
if all:
|
||||
return tuple(self.page.mainFrame().findAllElements(selector).toList())
|
||||
ans = self.page.mainFrame().findFirstElement(selector)
|
||||
if ans.isNull():
|
||||
ans = None
|
||||
return ans
|
||||
|
||||
def get_image(self, qwe_or_selector):
|
||||
'''
|
||||
Return the image identified by qwe_or_selector as a QPixmap. If no such
|
||||
image exists, the returned pixmap will be null.
|
||||
'''
|
||||
return self.page.get_image(qwe_or_selector)
|
||||
|
||||
def get_cached(self, url):
|
||||
iod = self.nam.cache.data(QUrl(url))
|
||||
if iod is not None:
|
||||
try:
|
||||
return bytes(bytearray(iod.readAll()))
|
||||
finally:
|
||||
# Ensure the IODevice is closed right away, so that the
|
||||
# underlying file can be deleted if the space is needed,
|
||||
# otherwise on windows the file stays locked
|
||||
iod.close()
|
||||
del iod
|
||||
|
||||
def wait_for_resources(self, urls, timeout=default_timeout):
|
||||
timeout = self.default_timeout if timeout is default_timeout else timeout
|
||||
start_time = time.time()
|
||||
ans = {}
|
||||
urls = set(urls)
|
||||
|
||||
def get_resources():
|
||||
for url in tuple(urls):
|
||||
raw = self.get_cached(url)
|
||||
if raw is not None:
|
||||
ans[url] = raw
|
||||
urls.discard(url)
|
||||
|
||||
while urls and time.time() - start_time > timeout and self.page.ready_state not in {'complete', 'completed'}:
|
||||
get_resources()
|
||||
if urls:
|
||||
self.run_for_a_time(0.1)
|
||||
|
||||
if urls:
|
||||
get_resources()
|
||||
return ans
|
||||
|
||||
def get_resource(self, url, rtype='img', use_cache=True, timeout=default_timeout):
|
||||
'''
|
||||
Download a resource (image/stylesheet/script). The resource is
|
||||
downloaded by visiting an simple HTML page that contains only that
|
||||
resource. The resource is then returned from the cache (therefore, to
|
||||
use this method you must not disable the cache). If use_cache is True
|
||||
then the cache is queried before loading the resource. This can result
|
||||
in a stale object if the resource has changed on the server, however,
|
||||
it is a big performance boost in the common case, by avoiding a
|
||||
roundtrip to the server. The resource is returned as a bytestring or None
|
||||
if it could not be loaded.
|
||||
'''
|
||||
if not hasattr(self.nam, 'cache'):
|
||||
raise RuntimeError('Cannot get resources when the cache is disabled')
|
||||
if use_cache:
|
||||
ans = self.get_cached(url)
|
||||
if ans is not None:
|
||||
return ans
|
||||
try:
|
||||
tag = {
|
||||
'img': '<img src="%s">',
|
||||
'link': '<link href="%s"></link>',
|
||||
'script': '<script src="%s"></script>',
|
||||
}[rtype] % prepare_string_for_xml(url, attribute=True)
|
||||
except KeyError:
|
||||
raise ValueError('Unknown resource type: %s' % rtype)
|
||||
|
||||
self.page.mainFrame().setHtml(
|
||||
'''<!DOCTYPE html><html><body><div>{0}</div></body></html>'''.format(tag))
|
||||
self._wait_for_load(timeout)
|
||||
ans = self.get_cached(url)
|
||||
if ans is not None:
|
||||
return ans
|
||||
|
||||
def show_browser(self):
|
||||
'''
|
||||
@ -461,11 +633,18 @@ class Browser(QObject, FormsMixin):
|
||||
def html(self):
|
||||
return unicode(self.page.mainFrame().toHtml())
|
||||
|
||||
def close(self):
|
||||
def blank(self):
|
||||
try:
|
||||
self.visit('about:blank', timeout=0.01)
|
||||
except Timeout:
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
self.stop()
|
||||
self.blank()
|
||||
self.stop()
|
||||
self.nam.setCache(QNetworkDiskCache())
|
||||
self.nam.cache = None
|
||||
self.nam = self.page = None
|
||||
|
||||
def __enter__(self):
|
||||
@ -474,3 +653,5 @@ class Browser(QObject, FormsMixin):
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
|
||||
|
||||
|
@ -10,6 +10,8 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre import as_unicode
|
||||
|
||||
default_timeout = object()
|
||||
|
||||
# Forms {{{
|
||||
class Control(object):
|
||||
|
||||
@ -43,7 +45,7 @@ class Control(object):
|
||||
self.qwe.setAttribute('value', as_unicode(val))
|
||||
elif self.type in ('number', 'range'):
|
||||
self.qwe.setAttribute('value', '%d'%int(val))
|
||||
else: # Unknown type treat as text
|
||||
else: # Unknown type treat as text
|
||||
self.qwe.setAttribute('value', as_unicode(val))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
@ -221,7 +223,7 @@ class FormsMixin(object):
|
||||
return self.current_form
|
||||
|
||||
def submit(self, submit_control_selector=None, wait_for_load=True,
|
||||
ajax_replies=0, timeout=30.0):
|
||||
ajax_replies=0, timeout=default_timeout):
|
||||
'''
|
||||
Submit the currently selected form. Tries to autodetect the submit
|
||||
control. You can override auto-detection by specifying a CSS2 selector
|
||||
@ -238,7 +240,7 @@ class FormsMixin(object):
|
||||
ajax_replies=ajax_replies, timeout=timeout)
|
||||
|
||||
def ajax_submit(self, submit_control_selector=None,
|
||||
num_of_replies=1, timeout=30.0):
|
||||
num_of_replies=1, timeout=default_timeout):
|
||||
'''
|
||||
Submit the current form. This method is meant for those forms that
|
||||
use AJAX rather than a plain submit. It will block until the specified
|
||||
@ -249,3 +251,4 @@ class FormsMixin(object):
|
||||
wait_for_load=False, ajax_replies=num_of_replies,
|
||||
timeout=timeout)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user