diff --git a/.gitattributes b/.gitattributes index f404b95595..0d97bfac69 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,13 +1,14 @@ # Tell git what files are txt *.py text -*.recipe text +*.recipe text diff=python *.coffee text *.js text *.pot text *.po text *.html text -*.css text +*.xhtml text *.htm text +*.css text *.rst text *.md text *.txt text @@ -20,12 +21,13 @@ *.tmpl text *.qrc text *.sh text -*.xhtml text +*.fb2 text *.bat text eol=crlf # Tell git what files are binary *.zip binary *.epub binary +*.docx binary *.jpg binary *.jpeg binary *.png binary diff --git a/.gitignore b/.gitignore index 7beb2fb0ac..090d11fd24 100644 --- a/.gitignore +++ b/.gitignore @@ -39,54 +39,6 @@ nbproject/ .settings/ *.DS_Store calibre_plugins/ -recipes/.git -recipes/.gitignore -recipes/README.md -recipes/icon_checker.py -recipes/readme_updater.py -recipes/garfield.recipe -recipes/katalog_egazeciarz.recipe -recipes/tv_axnscifi.recipe -recipes/tv_comedycentral.recipe -recipes/tv_discoveryscience.recipe -recipes/tv_foxlife.recipe -recipes/tv_fox.recipe -recipes/tv_hbo.recipe -recipes/tv_kinopolska.recipe -recipes/tv_nationalgeographic.recipe -recipes/tv_polsat2.recipe -recipes/tv_polsat.recipe -recipes/tv_tv4.recipe -recipes/tv_tvn7.recipe -recipes/tv_tvn.recipe -recipes/tv_tvp1.recipe -recipes/tv_tvp2.recipe -recipes/tv_tvphd.recipe -recipes/tv_tvphistoria.recipe -recipes/tv_tvpkultura.recipe -recipes/tv_tvppolonia.recipe -recipes/tv_tvpuls.recipe -recipes/tv_viasathistory.recipe -recipes/icons/katalog_egazeciarz.png -recipes/icons/garfield.png -recipes/icons/tv_axnscifi.png -recipes/icons/tv_comedycentral.png -recipes/icons/tv_discoveryscience.png -recipes/icons/tv_foxlife.png -recipes/icons/tv_fox.png -recipes/icons/tv_hbo.png -recipes/icons/tv_kinopolska.png -recipes/icons/tv_nationalgeographic.png -recipes/icons/tv_polsat2.png -recipes/icons/tv_polsat.png -recipes/icons/tv_tv4.png -recipes/icons/tv_tvn7.png -recipes/icons/tv_tvn.png -recipes/icons/tv_tvp1.png -recipes/icons/tv_tvp2.png -recipes/icons/tv_tvphd.png -recipes/icons/tv_tvphistoria.png -recipes/icons/tv_tvpkultura.png -recipes/icons/tv_tvppolonia.png -recipes/icons/tv_tvpuls.png -recipes/icons/tv_viasathistory.png +recipes/*.mobi +recipes/*.epub +recipes/debug diff --git a/Changelog.old.yaml b/Changelog.old.yaml index 4910f4f849..2b70821db8 100644 --- a/Changelog.old.yaml +++ b/Changelog.old.yaml @@ -1,4 +1,823 @@ +- version: 0.8.69 + date: 2012-09-14 + + new features: + - title: "E-book viewer: Add a button to the toolbar to switch themes easily" + tickets: [1047992] + + - title: "When downloading metadata for many books, if some of them fail, add an option to the downloaded message to show the failed books in the main book list, so that they can be individually processed easily" + + - title: "Remember last used window size of the conversion dialogs." + tickets: [1049265] + + - title: "Kindle driver: Turn on sending of azw3 files to kindles by default, since the KK now has azw3 support" + + - title: "Conversion: Add support for CSS pseudo classes :hover, :link, :visited, :first-line, :focus, :active, :first-letter" + + - title: "Wireless device driver: Make the default save template not use folders" + + bug fixes: + - title: "Fix a regression in th previous release that broke sending of books to the second SD card in SONY readers" + tickets: [1047992] + + - title: "Fix a memory leak when scanning for devices in windows" + + - title: "Ebook-viewer: When displaying mathematics, reflow equations that dont fit on a single line" + + - title: "Catalogs: Do not mark the AZW3 catalog as a periodical, as most Kindle devices cannot handle AZW3 periodicals" + + - title: "Content server: When using a custom IP address to listen on via Preferences->Tweaks advertise that IP address via BonJour." + + - title: "Fix ebook catalog generation on linux systems where the encoding is not UTF-8." + tickets: [1048404] + + improved recipes: + - De Volksrant + - Metro UK + - Countryfile + - Die Zeit (subscription) + - Birmingham post + + new recipes: + - title: History Today + author: Rick Shang + +- version: 0.8.68 + date: 2012-09-07 + + new features: + - title: "Drivers for the Nokia N9, Viewsonic 7e, Prestigio PER3274B and Coby Kyros 7035 " + tickets: [1046794,1046544] + + - title: "Add a tutorial on creating catalogs to the User Manual and a link to it in the create catalogs dialog" + + - title: "Wireless device connections: Add an option to force calibre to listen on a particular IP address. Access it by customizing the plugin in Preferences->Plugins" + + - title: "Android driver: Add an extra customization option to configure the directory to which ebooks are sent on the storage cards." + tickets: [1045045] + + - title: "Add an option under Preferences->Look & Feel->Book Details to hide the cover in the book details panel" + + - title: "The Calibre Companion Android app that allows wireless connection of Android device to calibre is out of beta. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid" + + bug fixes: + - title: "Fix sorting by author not working in the device view in calibre when connected to iTunes" + tickets: [1044619] + + - title: "Fix using the 'configure this device' menu action not validating settings" + + - title: "Device drivers: Ignore corrupted entries in metadata.calibre, instead of raising an error" + + - title: "PDF Output: Do not error out when generating an outline which points to pages that have been removed." + tickets: [1044799] + + - title: "PDF Output: Fix incorrect page numbers being generated in the outline when converting some books" + + - title: "PDF Output: Reduce memory consumption when writing out the PDF file, by using a stream" + + - title: "EPUB metadata: When there are multiple tags use the one with the earliest date as the published date" + + improved recipes: + - Wall Street journal (subscription version) + - Houston Chronicle + - Various Romanian news sources + - Business Week Magazine + - Arcamax + +- version: 0.8.67 + date: 2012-08-31 + + new features: + - title: "PDF Output: Generate a PDF Outline based on the Table of Contents of the input document" + + - title: "Conversion: Add an option under Structure Detection to set the 'Start reading at' metadata with an XPath expression." + tickets: [1043233] + + - title: "Speed up changing the title and author of files with books larger than 3MB by avoiding an unnecessary extra copy." + + - title: "Wireless device driver: Make detecting and connecting to devices easier on networks where mdns is disabled" + + - title: "PDF Output: Allow choosing the default font family and size when generating PDF files (under PDF Options) in the conversion dialog" + + - title: "Metadata dialog: Comments editor: Allow specifying the name of a link when using the insert link button." + tickets: [1042683] + + - title: "Remove the unmaintained pdfmanipulate command line utility. There are many other tools that provide similar functionality, for example, pdftk and podofo" + + bug fixes: + - title: "Catalogs: Fix regression that broke sorting of non series titles before series titles" + + - title: "PDF Output: Do not create duplicate embedded fonts in the PDF for every individual HTML file in the input document" + + - title: "Fix regression that broke DnD of files having a # character in their names to the book details panel" + + - title: "PDF Output: Allow generating PDF files with more than 512 pages on windows." + tickets: [1041614] + + - title: "Fix minor bug in handling of the completion popups when using the next/previous buttons in the edit metadata dialog" + ticket: [1041389] + + improved recipes: + - Coding Horror + - TIME Magazine + + new recipes: + - title: Cumhuriyet Yzarlar + author: Sethi Eksi + + - title: Arcadia + author: Masahiro Hasegawa + + - title: Business Week Magazine and Chronicle of Higher Education + author: Rick Shang + + - title: CIPER Chile + author: Darko Miletic + +- version: 0.8.66 + date: 2012-08-24 + + new features: + - title: "E-book viewer: Support the display of mathematics in e-books. Supports both embedded TeX and MathML" + description: "The calibre ebook viewer can now display embedded mathematics (symbols, equations, fractions, matrices, etc.) in EPUB and HTML ebooks. For details, see: http://manual.calibre-ebook.com/typesetting_math.html" + type: major + + - title: "Drivers for SONY PRS-T2, Freelander PD10 and Coolreader Tablet" + tickets: [1039103] + + - title: "Wireless device connections: Use a streamed mode for improved networking performance leading to much less time spent sending metadata to/from the device. Also make it easier to specify a fixed port directly in the dialog used to start the connection." + + - title: "Get books: Add ebooksgratuitis.com" + + bug fixes: + - title: "PDF Output: Handle input epub documents with filenames starting with a dot. Also do not hang if there is an unhandled error." + tickets: [1040603] + + - title: "Get Books: Update B&N plugin to handle changes to the B&N website" + + - title: "Content server: Fix regression that caused the port being advertised via BonJour to be incorrect if the user changed the port for the server." + tickets: [1037912] + + + improved recipes: + - Variety + - The Times UK + + new recipes: + - title: Le Monde subscription version + author: Remi Vanicat + + - title: Brecha Digital + author: Darko Miletic + +- version: 0.8.65 + date: 2012-08-17 + + new features: + - title: "A new wireless device driver. This allows connecting wirelessly to a device running a 'smart' calibre client" + description: "The wireless connection functions just as if the device was plugged into the computer by USB cable. Currently, Android devices are supported. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid" + type: major + + - title: "MOBI Output: Add an option to control the type of MOBI file produced, to the MOBI Output conversion options. You can now generate an old MOBI6, a new KF8 or a joint MOBI6/KF8 file. By default, MOBI6 files are generated. This replaces the previous use of a tweak." + + - title: "E-book viewer: Make paged mode the default. You can go back to the old flow mode by clicking the button with the yellow scroll in the top right corner of the viewer." + + - title: "Driver for COBY kYROS MID7042 and Samsung Galaxy Ace S5839i" + + bug fixes: + - title: "Update version of poppler bundled with calibre to fix reading covers from some PDF files" + + - title: "Get Books: Fix clicking of results from Diesel books when there is only a single result not working" + + - title: "Improve detection of system language on first run of calibre" + tickets: [1036354] + + - title: "When finding the next series index and the last series index is a fractional number, use the next largest integer, instead of just adding 1" + + - title: "Fix exception when saving a search/replace when no saved search/replace had been opened previously in the bulk search/replace dialog" + tickets: [1036464] + + - title: "Fix restore database not restoring entries for the original_* formats" + + - title: "Fix first run wizard not allowing empty email sending settings" + tickets: [1036358] + + - title: "Do not error out when setting the cover for a book that has no folders in the library." + tickets: [1035935] + + - title: "Conversion pipeline: Ignore unparseable values in the color attribute of font tags, instead of erroring out on them." + tickets: [1035633] + + - title: "Catalogs: Fix regression that broke creation of catalogs while a device is connected" + + - title: "Fix --with-library=/whatever not working for calibredb list" + + improved recipes: + - Slashdot + - Various Canadian newspapers + - Business Spectator + +- version: 0.8.64 + date: 2012-08-09 + + new features: + - title: "E-book viewer: Allow viewing images in the book in a separate pop-up window by right clicking on the image. Useful if you want to keep some image, like a map to the side while reading the book." + + - title: "Catalogs: Allow generation of catalogs in AZW3 format. Also add more powerful configuration options to exclude books and set prefixes. See http://www.mobileread.com/forums/showthread.php?t=187298 for details." + + - title: "Generate a PDF version of the User Manual" + + bug fixes: + - title: "News download: Fix broken handling of nesting for HTML 5 tags when parsing with BeautifulSoup" + + - title: "EPUB: Handle files in the EPUB that have semi-colons in their file names. This means in particular using URL escaping when creating the NCX as ADE cannot handle unescaped semi-colons in the NCX." + tickets: [1033665] + + - title: "Conversion pipeline: Ignore unparseable CSS instead of erroring out on it." + tickets: [1034074] + + - title: "When setting up a column coloring rule based on the languages column, allow entry of localized language names instead of only ISO codes" + + - title: "Catalogs: Generate cover for mobi/azw3 catalogs" + + - title: "Update the last modified column record of a book, whenever a format is added to the book." + + - title: "E-book viewer: Fix line scrolling stops at breaks option not working in paged mode" + tickets: [1033430] + + - title: "MOBI Output: Fix ToC at start option having no effect when converting some input documents that have an out-of-spine ToC." + tickets: [1033656] + + - title: "Catalog Generation: When generating EPUB/MOBI catalogs add more flexible rules for excluding books. Also add rules to customize the prefix characters used." + + - title: "Make setting published date using metadata search/replace more robust." + + - title: "Tag Browser: Flatten the display of sub-groups when sort by is not set to 'name'." + tickets: [1032746] + + - title: "Fix isbn:false not matching if other identifiers are attached to the book." + + improved recipes: + - The New Republic + - ZDNet + - Metro UK + - FHM UK + + new recipes: + - title: eKundelek.pl + author: Artur Stachecki + + - title: Sueddeutsche Mobil + author: Andreas Zeiser + +- version: 0.8.63 + date: 2012-08-02 + + new features: + - title: "E-book viewer: Allow quick saving and loading of viewer settings as 'themes'." + tickets: [1024611] + + - title: "Ebook-viewer: Add a restore defaults button to the viewer preferences dialog" + + - title: "E-book viewer: Add simple settings for text and background colors" + + - title: "Add an entry to save to disk when right clicking a format in the book details panel" + + - title: "ODT metadata: Read first image as the metadata cover from ODT files. Also allow ODT authors to set custom properties for extended metadata." + + - title: "E-book viewer and PDF Output: Resize images that are longer than the page to fit onto a single page" + + bug fixes: + - title: "KF8 Output: Fix bug where some calibre generated KF8 files would cause the Amazon KF8 viewer on the Touch to go to into an infinite loop when using the next page function" + tickets: [1026421] + + - title: "News download: Add support for tags that link to SVG images." + tickets: [1031553] + + - title: "Update podofo to 0.9.1 in all binary builds, to fix corruption of some PDFs when updating metadata." + tickets: [1031086] + + - title: "Catalog generation: Handle authors whose last name is a number." + + - title: "KF8 Input: Handle html entities in the NCX toc entries correctly" + + - title: "Fix a calibre crash that affected some windows installs" + tickets: [1030234] + + - title: "MOBI Output: Normalize unicode strings before writing to file, to workaround lack of support for non-normal unicode in Amazon's MOBI renderer." + tickets: [1029825] + + - title: "EPUB Input: Handle files that have duplicate entries in the spine" + + - title: "Fix regression in Kobo driver that caused the on device column to not be updated after deleting books" + + new recipes: + - title: Dziennik Polski + author: Gregorz Maj + + - title: High Country Blogs + author: Armin Geller + + - title: Philosophy Now + author: Rick Shang + +- version: 0.8.62 + date: 2012-07-27 + + new features: + - title: "Book details panel: Allow right clicking on a format to delete it." + + - title: "When errors occur in lots of background jobs, add an option to the error message to temporarily suppress subsequent error messages." + tickets: [886904] + + - title: "E-book viewer full screen mode: Allow clicking in the left and right page margins to turn pages." + tickets: [1024819] + + - title: "Drivers for various Android devices" + tickets: [1028690,1027431] + + - title: "Advanced search dialog: When starting on the title/author/etc. tab, restore the previously used search kind as well." + tickets: [1029745] + + - title: "When presenting the calibre must be restarted warning after installing a new plugin, add a restart now button so that the user can conveniently restart calibre. Currently only works when going vie Preferences->Plugins->Get new plugins" + + bug fixes: + - title: "Fix main window layout state being saved incorrectly if calibre is killed without a proper shutdown" + + - title: "Fix boolean and date searching in non english calibre installs." + + - title: "Conversion: Ignore invalid chapter detection and level n ToC expressions instead of erroring out" + + improved recipes: + - Psychology Today + - The Smithsonian + - The New Republic + - Various updated Polish news sources + - The Sun + - San Francisco Bay Guardian + - AnandTech + - Smashing Magazine + + new recipes: + - title: Linux Journal and Conowego.pl + author: fenuks + + - title: A list apart and .net magazine + author: Marc Busque + +- version: 0.8.61 + date: 2012-07-20 + + new features: + - title: "E-book viewer: Add a paged mode that splits up the text into pages, like in a paper book instead of presenting it as a single column. To activate click the button with the yellow scroll icon in the top right corner." + type: major + description: "In paged mode, the ebook viewer no longer cuts off the last line of text at the bottom of the screen, and it respects CSS page-break directives. You can also set page margins and control the number of pages displayed on screen by clicking the Preferences button in the viewer and going to 'Text layout in paged mode'." + + - title: "Digitally sign the calibre OS X and windows builds" + + - title: "Get Books: Add Mills and Boon UK" + + - title: "Various minor improvements to the Bulk metadata edit dialog" + tickets: [1025825, 1025838, 1025628] + + - title: "Fix various regression in the auto-complete functionality for authors/series/tags etc introduced in 0.8.60" + + - title: "Drivers for various new Android devices" + tickets: [1024934] + + - title: "MOBI: Add support for the new language EXTH header field in MOBI files generated by kindlegen 2.5" + + bug fixes: + - title: "KF8 Output: Fix calibre produced KF8 files not showing the 'Use publisher font' option on the Kindle Touch when they have embedded fonts" + + - title: "Txt/fb2/rtf/pml/rb output: Fix non-visibile element's tail text (which should be visible) is being ignored when it shouldn't." + tickets: [1026541] + + - title: "Book details panel: When displaying a link to amazon, use a country specific name like amazon.fr instead of using amazon.com for all countries" + + - title: "Conversion: When splitting on page breaks, ignore page-breaks with values of auto and inherit. " + tickets: [1018875] + + - title: "Metadata jacket: Specify foreground in addition to the background color for the title banner so that it remain readable if the user tries to monkey with the CSS in the viewer." + + - title: "PDF Output: Fix rendering of cover as first age of PDF (ignore margins so that the image covers the entire page)" + + - title: "Linux binaries: Bundle libglib to avoid incompatibilities with glib on various distros." + tickets: [1022019] + + - title: "Fix find_identical_books() choking on books with too many authors" + + + improved recipes: + - Toronto Star + - American Prospect + - faz.net + +- version: 0.8.60 + date: 2012-07-13 + + new features: + - title: "When searching, allow use of un-accented characters to match accented characters in all fields and all languages (not just authors and English as before)" + description: "The rules for matching un-accented characters are done in a language dependent way. So if your calibre interface language is set to English, n will match both n and ñ, but if it is set to Spanish, it will match only n, as in Spanish ñ is a separate alphabet in Spanish. This makes searching a little slower, so if you have a very large library you can turn it off via Preferences->Searching." + type: major + + - title: "Content server: Show a best guess for the IP address the content server is currently listening at in the connect/share menu." + tickets: [1024128] + + - title: "E-book viewer: Add an option to show a clock in full screen mode." + tickets: [1022086] + + - title: "Drivers for Paquito Imaginarium and a few Android phones" + tickets: [1024021,1023613,1023461,1022401] + + - title: "HTMLZ Output: Add option to use the book title as the filename for the html file inside the archive" + + - title: "Make the list of displayed fields in the book details panel a per library setting" + + - title: "Have autocomplete on authors/series/tags/etc. ignore accented characters when finding matches (similar to the changes to search above)" + + - title: "Support for retina displays in OS X (I hope)" + tickets: [1022191] + + - title: "Remove the dependency on the zip command line tool when developing plugins" + + bug fixes: + - title: "Kobo driver: Do not perform write operations on the Kobo database if its version is newer than the latest version the driver supports, for safety" + + - title: "KF8 Input: Ignore encoding declarations inside the html markup, as they are sometimes incorrect." + tickets: [1022933] + + - title: "Force refresh of cached composite column values when values in the cache are changed" + + - title: "Fix a regression that broke calibre --shutdown-running-calibre on windows." + tickets: [1022504] + + - title: "Possible workaround for Qt 4.8.2 open file dialog failing on some linux distros." + tickets: [1022019] + + - title: "Catalogs: Fix some epubcheck errors when generating catalogs in EPUB format" + + - title: "Linux installer: When calling the xdg utilities use system libraries rather than the libraries bundled with calibre" + + - title: "Fix numeric sort for composite custom columns that use custom separators" + tickets: [1021814] + + - title: "Tag browser: When grouping by first letter, handle languages that have 'letters' made of more than one character. This can be turned off via Preferences->Tweaks" + + improved recipes: + - Hola magazine + - Adventure Gamers + - Cosmopolitan UK + - Onda Rock + + new recipes: + - title: Empire Magazine + author: Dave Asbury + + - title: NZZ Folio + author: Bernd Leinfelder + + - title: Warentest + author: asdfdsfksd + + +- version: 0.8.59 + date: 2012-07-06 + + new features: + - title: "Drivers for Samsung SGH-T989 and Sony Ericsson Sola" + tickets: [1021365] + + - title: "Conversion pipeline: When removing the first image, also remove the html file the image is found in, if that file has no other content. Allows this option to be used to remove covers from EPUB files without leaving behind a blank page." + + - title: "Content server: Add a navigation panel at the bottom of each page." + tickets: [1020225] + + - title: "calibredb: Add a backup_metadata command to manually run the backup to opf from the command line" + + - title: "User defined driver: Add option to swap main memory and card a." + tickets: [1020056] + + - title: "Add new option to the series_index_auto_increment tweak, no_change, that causes calibre not to change the series_index when the series is changed" + + bug fixes: + - title: "PDF Output: Resize large images so that they do not get off at the right edge of the page." + + - title: "On linux ensure that WM_CLASS for the main calibre GUI is set to 'calibre-gui' to match the name of the calibre-gui.desktop file. This is apparently required by the GNOME 3 shell." + tickets: [1020297] + + - title: "Update ICU in all builds to version 49.1" + + - title: "Tag browser: Fix regression that broke drag and drop between user categories in the tag browser" + + - title: "When copying to library and deleting after copy, do not place deleted files in recycle bin, as this is redundant and slow (they have already been copied into another library)" + + - title: "Fix yes/no fields with value of No not showing up in the book details panel" + + - title: "Catalogs: Better sorting for non English languages" + tickets: [930882] + + - title: "Get Books: Fix Foyles UK, Weightless books, ebooks.com and ozon.ru" + + - title: "CHM Input: Fix handling of chm files that split their html into multiple sub-directories." + tickets: [1018792] + + improved recipes: + - FHM UK + - The Age + - weblogs_ssl + - Heraldo.es + + new recipes: + - title: CATO Institute and Heritage Foundation + author: _reader + +- version: 0.8.58 + date: 2012-06-29 + + new features: + - title: "Add some texture to calibre generated covers" + + - title: "Drivers for Sogo SS-4370, HTC G2 and Lenovo ThinkPad Tablet" + tickets: [1019050, 1017010] + + - title: "Add search to the Manage tags/series/etc. dialogs" + + - title: "News download: Add support for images embedded in the HTML" + + - title: "calibre -s now waits for calibre to shutdown" + + bug fixes: + - title: "Workaround for iTunes breaking scripting with version 10.6.3 on OS X." + tickets: [1012243] + + - title: "EPUB Input: When there are multiple elements of the same type in the OPF guide, use the first rather than the last element." + + - title: "Windows: Disable the new UI style if the color depth of the desktop is less than 32 bits per pixel" + + - title: "ISBNDB metadata plugin: Return results even though they have no comments" + + - title: "More robust handling of EINTR during IPC" + + - title: "Metadata download: Support for amazon's new results page markup" + + - title: "EPUB Output: Fix a bug that could cause corrupted output when doing an EPUB/OEB to EPUB conversion if the input EPUB had multiple files with the same name" + + - title: "KF8 Output: Fix a couple of bugs that could lead to generation of invalid KF8 files." + tickets: [1016672] + + improved recipes: + - ABC Digital + - O Globo + + new recipes: + - title: Sign of the Times and New Statesman + author: TerminalVeracity + + - title: CT24 + author: zoidozoido + + - title: SmileZilla + author: Will + + - title: Marketing Sensoriale + author: NotTaken + +- version: 0.8.57 + date: 2012-06-22 + + new features: + - title: "PDF Output: Full pagination support. No more cutoff bottom line." + type: major + description: "Fixes a long standing bug in calibre's PDF Output that caused the bottom line of some pages to be partially cut off and prevented top and bottom margins from working." + + - title: "calibredb add now prints out the ids of added books" + tickets: [1014303] + + - title: "Kobo Vox driver: Add support for new Google Play firmware" + tickets: [1014129] + + - title: "Driver for Prestigio PMP5097PRO" + tickets: [1013864] + + - title: "Add option to disable tooltips in the book list under Preferences->Look & Feel" + + - title: "When customizing builtin recipes download the latest version of the recipe to customize instead of using the possibly out of date bundled version" + + bug fixes: + - title: "PDF Output: Use the cover from the input document when no cover is specified during a conversion" + + - title: "E-book Viewer: Printing now has proper pagination with top and bottom margins no lines partially cut-off at the bottom and full style retention" + + - title: "KF8 Input: Handle files with incorrectly encoded guide type entries." + tickets: [1015020] + + - title: "E-book viewer: Disable hyphenation on windows xp as Qt WebKit barfs on soft hyphens on windows XP" + + - title: "Handle OS X systems with invalid palette colors." + tickets: [1014900] + + - title: "Tag Browser: Fix regression that broke partitioning of hierarchical categories." + tickets: [1014065] + + - title: "LRF Output: Handle negative page margins" + tickets: [1014103] + + - title: "Template language: Fix arithmetic functions to tolerate the value 'None' as returned by raw_field()" + + - title: "Fix custom title sort set in the edit metadata dialog getting reset by the conversion dialog" + + improved recipes: + - The Economist + - Akter + - 24 Sata sr + - Novi List + - Metro Montreal + - Mode Durable + - CanardPC + - The Economic Collapse + - Our Daily Bread + + new recipes: + - title: Akter Daily + author: Darko MIletic + + - title: BBC Brasil + author: Claviola + + - title: Homopedia.pl + author: rainbowwarrior + + - title: National Geographic Magazine + author: Terminal Veracity + + - title: Something Awful + author: atordo + + - title: Huffington Post UK + author: Krittika Goyal + +- version: 0.8.56 + date: 2012-06-15 + + new features: + - title: "Make the new calibre style default on Windows and OS X." + type: major + description: "This change gives a more 'modern' feel to the calibre user interface with focus highlighting, gradients, rounded corners, etc. In case you prefer the old look, you can restore under Preferences->Look & Feel->User interface style" + + - title: "Get Books: Add the new SONY Reader store" + + - title: "Read metadata from .docx (Microsoft Word) files" + + - title: "Allow customizing the behavior of the searching for similar books by right clicking the book. You can now tell calibre to search different columns than the traditional author/series/publisher/tags/etc. in Preferences->Searching" + + - title: "Add option to restore alternating row colors to the Tag Browser under Preferences->Look & Feel->Tag Browser" + + - title: "Update to Qt 4.8.2 on windows compiled with link time code generation for a small performance boost" + + bug fixes: + - title: "Get Books: Update plugins to handle website changes at ebooks.com, project gutenberg, and virtualo" + + - title: "AZW3 Output: Fix TOC at start option not working" + + - title: "AZW3 Output: Close self closing script/style/title/head tags explicitly as they cause problems in webkit based renderers like the Kindle Fire and calibre's viewers." + + - title: "Fix the current_library_name() template function not updating after a library switch" + + - title: "AZW3 Output: Handle the case of a link pointing to the last line of text in the document." + tickets: [1011330] + + - title: "Fix regression in 0.8.55 that broke highlighting of items matching a search in the Tag Browser" + tickets: [1011030] + + - title: "News download: Handle query only relative URLs" + + improved recipes: + - Christian Science Monitor + - Neue Zurcher Zeitung + - Birmignham Post + - Metro UK + - New Musical Express + - The Independent + - The Daily Mirror + - Vreme + - Smithsonian Magazine + + new recipes: + - title: NZZ Webpaper + author: Bernd Leinfelder + + +- version: 0.8.55 + date: 2012-06-08 + + new features: + - title: "Add a new 'Calibre style' interface look that is more modern than the default look. You can select it via Preferences->Look & Feel->User interface style." + + - title: "New, subtler look for the Tag Browser" + + - title: "Driver for Trekstor Pyrus and Pantech Android Tablet" + tickets: [1008946, 1007929] + + - title: "Conversion pipeline: Handle guide elements with incorrectly cased hrefs. Also handle guide elements of type coverimagestandard and thumbimagestandard." + + - title: "Allow user to customize trekstor plugin to send books into sub directories." + tickets: [1007646] + + - title: "EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now." + tickets: [1008810] + + - title: "Save single format to disk: Only show the format available in the selected books." + tickets: [1007287] + + bug fixes: + - title: "MOBI Output: When using the insert metadata at start of book option, do not use a table to layout the metadata, as the Kindle Fire crashes when rendering the table." + tickets: [1002119] + + - title: "Device detection: Fix a bug that could cause device detection to fail completely if devices with certain vendor/product ids are connected." + tickets: [1009718] + + - title: "MOBI Output: When rasterizing svgs only compute style information when an actual svg image is present. Small speedup when converting large svg-free documents to MOBI." + + - title: "SONY T1 driver: Fix support for collections of books placed on the SD card" + tickets: [986044] + + - title: "Fix partitioning problems in tag browser with fields that have no name, such as identifiers and formats" + + - title: "Welcome wizard: Preferentially use the kindle email address set as default when more than one such address exists." + tickets: [1007932 ] + + - title: "Fix regression in 0.8.54 that broke the use of the shortcut Alt+A to select books by the same author" + + improved recipes: + - Various Polish recipes + - Vice Magazine + - EL Mundo Today + - Haaretz + - Good Housekeeping + - El Pais + - Christian Science Monitor + - Marketing Magazine + - Instapaper + + new recipes: + - title: Various Philippine news sources + author: jde + + - title: Natemat.pl and wirtualnemedia.pl + author: fenuks + + - title: Rabble.ca + author: timtoo + +- version: 0.8.54 + date: 2012-05-31 + + new features: + - title: "E-book viewer: The Table of contents panel now tracks the current position in the book. As you scroll through the book, the entry you are currently on is highlighted." + type: major + description: "To see this feature in action, open the Table of Contents panel in the viewer by clicking the button with three blue lines on it. As you page through the book, the chapter you are reading currently is highlighted in the Table of Contents Panel. Obviously, this will only work if the book you are reading has a Table of Contents. You can also use the Ctrl+PgUp and Ctrl+PgDn keys to quickly skip between chapters." + + - title: "calibredb: Allow setting metadata for individual fields with the set_metadata command" + + - title: "Make it a little harder to accidentally change the sorting of items in the Tag Browser. Also frees up more vertical space for the Tag Browser itself." + + - title: "The calibre user manual is now available in AZW3 format as well as EPUB" + + bug fixes: + - title: "Automatic titlecasing: No longer try to capitalize scottish names, as there are too many special cases." + tickets: [775825] + + - title: "Never crash when reading metadata from PDF files (reading now always happens in a worker process)" + tickets: [1006452] + + - title: "EPUB Input: Do no skip the valid children of an NCX node that has no text/href" + + - title: "Archos driver: Detect SD card" + tickets: [1005650] + + - title: "When bulk downloading metadata and the user deletes one of the books for which metadata is being downloaded, just ignore it, instead of erroring out" + + - title: "When deleting books from the bottom of the booklist, ensure that the bottom book after deleting is selected" + + - title: "Fix regression in 0.8.53 that broke sending APNX files to older Kindle devices" + + - title: "Use correct text color for selected rows in the list of matches when downloading metadata and showing results in get books." + tickets: [1004568] + + improved recipes: + - The Independent + - Welt der Physik + - China Daily + - The Grid + - Prospect Magazine + + new recipes: + - title: La gazetta del Mezzogiorno + author: faber1971 + + - version: 0.8.53 date: 2012-05-25 diff --git a/Changelog.yaml b/Changelog.yaml index 1d8dcb88a9..a411abd691 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -20,6 +20,53 @@ # new recipes: # - title: +- version: 0.9.34 + date: 2013-06-07 + + new features: + - title: "Conversion of Microsoft Word documents (.docx files generated by Word 2007 or newer)" + type: major + description: "DOCX files created with Microsoft Word 2007 or newer can now be converted by calibre. The converter has support for lists, tables, images, all types of text formatting, footnotes, endnotes and even dropcaps. A sample docx file showing the capabilities of the converter is available: http://calibre-ebook.com/downloads/demos/demo.docx Note that this code is still very new, so there are more than likely a few bugs waiting to be squashed." + + - title: "Kobo driver: Support for the newly released firmware 2.6.1. Also remove empty shelves from the Aura HD home page when deleting books." + tickets: [1187791] + + - title: "E-book viewer: Add Keyboard shortcuts for Back and Forward (Alt+Left, Alt+Right)" + tickets: [1186928] + + - title: "Allow right clicking on an author in Book Details to manage that author, i.e. change the author name, sort value or link." + tickets: [1186192] + + bug fixes: + - title: "Fix regression that broke FB2 input in the previous release." + tickets: [1186213] + + - title: "Catalog generation on OS X: Fix handling of some unicode characters" + tickets: [1066922] + + - title: "HTML Input: Avoid spurious log warnings about unspecified language/creator when these are actually specified on the command line." + tickets: [1186899] + + - title: "MOBI Output: Fix regression in 0.9.31 that caused vertical margins specified on some block level elements to be ignored." + tickets: [1186533] + + - title: "ToC Editor: Handle ebooks that have

tags inside the tags. Instead of erroring out, the

tags are automatically moved into ." + tickets: [1186298] + + - title: "Linux build: Include the ffi libs from both gcc and libffi." + tickets: [1186148] + + - title: "When deleting custom recipes, use recycle bin." + tickets: [1186142] + + improved recipes: + - Folha de Sao Paolo + - Metro News NL + + new recipes: + - title: Seventh Guard + author: koliberek + - version: 0.9.33 date: 2013-05-31 @@ -1878,821 +1925,3 @@ author: drMerry -- version: 0.8.69 - date: 2012-09-14 - - new features: - - title: "E-book viewer: Add a button to the toolbar to switch themes easily" - tickets: [1047992] - - - title: "When downloading metadata for many books, if some of them fail, add an option to the downloaded message to show the failed books in the main book list, so that they can be individually processed easily" - - - title: "Remember last used window size of the conversion dialogs." - tickets: [1049265] - - - title: "Kindle driver: Turn on sending of azw3 files to kindles by default, since the KK now has azw3 support" - - - title: "Conversion: Add support for CSS pseudo classes :hover, :link, :visited, :first-line, :focus, :active, :first-letter" - - - title: "Wireless device driver: Make the default save template not use folders" - - bug fixes: - - title: "Fix a regression in th previous release that broke sending of books to the second SD card in SONY readers" - tickets: [1047992] - - - title: "Fix a memory leak when scanning for devices in windows" - - - title: "Ebook-viewer: When displaying mathematics, reflow equations that dont fit on a single line" - - - title: "Catalogs: Do not mark the AZW3 catalog as a periodical, as most Kindle devices cannot handle AZW3 periodicals" - - - title: "Content server: When using a custom IP address to listen on via Preferences->Tweaks advertise that IP address via BonJour." - - - title: "Fix ebook catalog generation on linux systems where the encoding is not UTF-8." - tickets: [1048404] - - improved recipes: - - De Volksrant - - Metro UK - - Countryfile - - Die Zeit (subscription) - - Birmingham post - - new recipes: - - title: History Today - author: Rick Shang - -- version: 0.8.68 - date: 2012-09-07 - - new features: - - title: "Drivers for the Nokia N9, Viewsonic 7e, Prestigio PER3274B and Coby Kyros 7035 " - tickets: [1046794,1046544] - - - title: "Add a tutorial on creating catalogs to the User Manual and a link to it in the create catalogs dialog" - - - title: "Wireless device connections: Add an option to force calibre to listen on a particular IP address. Access it by customizing the plugin in Preferences->Plugins" - - - title: "Android driver: Add an extra customization option to configure the directory to which ebooks are sent on the storage cards." - tickets: [1045045] - - - title: "Add an option under Preferences->Look & Feel->Book Details to hide the cover in the book details panel" - - - title: "The Calibre Companion Android app that allows wireless connection of Android device to calibre is out of beta. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid" - - bug fixes: - - title: "Fix sorting by author not working in the device view in calibre when connected to iTunes" - tickets: [1044619] - - - title: "Fix using the 'configure this device' menu action not validating settings" - - - title: "Device drivers: Ignore corrupted entries in metadata.calibre, instead of raising an error" - - - title: "PDF Output: Do not error out when generating an outline which points to pages that have been removed." - tickets: [1044799] - - - title: "PDF Output: Fix incorrect page numbers being generated in the outline when converting some books" - - - title: "PDF Output: Reduce memory consumption when writing out the PDF file, by using a stream" - - - title: "EPUB metadata: When there are multiple tags use the one with the earliest date as the published date" - - improved recipes: - - Wall Street journal (subscription version) - - Houston Chronicle - - Various Romanian news sources - - Business Week Magazine - - Arcamax - -- version: 0.8.67 - date: 2012-08-31 - - new features: - - title: "PDF Output: Generate a PDF Outline based on the Table of Contents of the input document" - - - title: "Conversion: Add an option under Structure Detection to set the 'Start reading at' metadata with an XPath expression." - tickets: [1043233] - - - title: "Speed up changing the title and author of files with books larger than 3MB by avoiding an unnecessary extra copy." - - - title: "Wireless device driver: Make detecting and connecting to devices easier on networks where mdns is disabled" - - - title: "PDF Output: Allow choosing the default font family and size when generating PDF files (under PDF Options) in the conversion dialog" - - - title: "Metadata dialog: Comments editor: Allow specifying the name of a link when using the insert link button." - tickets: [1042683] - - - title: "Remove the unmaintained pdfmanipulate command line utility. There are many other tools that provide similar functionality, for example, pdftk and podofo" - - bug fixes: - - title: "Catalogs: Fix regression that broke sorting of non series titles before series titles" - - - title: "PDF Output: Do not create duplicate embedded fonts in the PDF for every individual HTML file in the input document" - - - title: "Fix regression that broke DnD of files having a # character in their names to the book details panel" - - - title: "PDF Output: Allow generating PDF files with more than 512 pages on windows." - tickets: [1041614] - - - title: "Fix minor bug in handling of the completion popups when using the next/previous buttons in the edit metadata dialog" - ticket: [1041389] - - improved recipes: - - Coding Horror - - TIME Magazine - - new recipes: - - title: Cumhuriyet Yzarlar - author: Sethi Eksi - - - title: Arcadia - author: Masahiro Hasegawa - - - title: Business Week Magazine and Chronicle of Higher Education - author: Rick Shang - - - title: CIPER Chile - author: Darko Miletic - -- version: 0.8.66 - date: 2012-08-24 - - new features: - - title: "E-book viewer: Support the display of mathematics in e-books. Supports both embedded TeX and MathML" - description: "The calibre ebook viewer can now display embedded mathematics (symbols, equations, fractions, matrices, etc.) in EPUB and HTML ebooks. For details, see: http://manual.calibre-ebook.com/typesetting_math.html" - type: major - - - title: "Drivers for SONY PRS-T2, Freelander PD10 and Coolreader Tablet" - tickets: [1039103] - - - title: "Wireless device connections: Use a streamed mode for improved networking performance leading to much less time spent sending metadata to/from the device. Also make it easier to specify a fixed port directly in the dialog used to start the connection." - - - title: "Get books: Add ebooksgratuitis.com" - - bug fixes: - - title: "PDF Output: Handle input epub documents with filenames starting with a dot. Also do not hang if there is an unhandled error." - tickets: [1040603] - - - title: "Get Books: Update B&N plugin to handle changes to the B&N website" - - - title: "Content server: Fix regression that caused the port being advertised via BonJour to be incorrect if the user changed the port for the server." - tickets: [1037912] - - - improved recipes: - - Variety - - The Times UK - - new recipes: - - title: Le Monde subscription version - author: Remi Vanicat - - - title: Brecha Digital - author: Darko Miletic - -- version: 0.8.65 - date: 2012-08-17 - - new features: - - title: "A new wireless device driver. This allows connecting wirelessly to a device running a 'smart' calibre client" - description: "The wireless connection functions just as if the device was plugged into the computer by USB cable. Currently, Android devices are supported. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid" - type: major - - - title: "MOBI Output: Add an option to control the type of MOBI file produced, to the MOBI Output conversion options. You can now generate an old MOBI6, a new KF8 or a joint MOBI6/KF8 file. By default, MOBI6 files are generated. This replaces the previous use of a tweak." - - - title: "E-book viewer: Make paged mode the default. You can go back to the old flow mode by clicking the button with the yellow scroll in the top right corner of the viewer." - - - title: "Driver for COBY kYROS MID7042 and Samsung Galaxy Ace S5839i" - - bug fixes: - - title: "Update version of poppler bundled with calibre to fix reading covers from some PDF files" - - - title: "Get Books: Fix clicking of results from Diesel books when there is only a single result not working" - - - title: "Improve detection of system language on first run of calibre" - tickets: [1036354] - - - title: "When finding the next series index and the last series index is a fractional number, use the next largest integer, instead of just adding 1" - - - title: "Fix exception when saving a search/replace when no saved search/replace had been opened previously in the bulk search/replace dialog" - tickets: [1036464] - - - title: "Fix restore database not restoring entries for the original_* formats" - - - title: "Fix first run wizard not allowing empty email sending settings" - tickets: [1036358] - - - title: "Do not error out when setting the cover for a book that has no folders in the library." - tickets: [1035935] - - - title: "Conversion pipeline: Ignore unparseable values in the color attribute of font tags, instead of erroring out on them." - tickets: [1035633] - - - title: "Catalogs: Fix regression that broke creation of catalogs while a device is connected" - - - title: "Fix --with-library=/whatever not working for calibredb list" - - improved recipes: - - Slashdot - - Various Canadian newspapers - - Business Spectator - -- version: 0.8.64 - date: 2012-08-09 - - new features: - - title: "E-book viewer: Allow viewing images in the book in a separate pop-up window by right clicking on the image. Useful if you want to keep some image, like a map to the side while reading the book." - - - title: "Catalogs: Allow generation of catalogs in AZW3 format. Also add more powerful configuration options to exclude books and set prefixes. See http://www.mobileread.com/forums/showthread.php?t=187298 for details." - - - title: "Generate a PDF version of the User Manual" - - bug fixes: - - title: "News download: Fix broken handling of nesting for HTML 5 tags when parsing with BeautifulSoup" - - - title: "EPUB: Handle files in the EPUB that have semi-colons in their file names. This means in particular using URL escaping when creating the NCX as ADE cannot handle unescaped semi-colons in the NCX." - tickets: [1033665] - - - title: "Conversion pipeline: Ignore unparseable CSS instead of erroring out on it." - tickets: [1034074] - - - title: "When setting up a column coloring rule based on the languages column, allow entry of localized language names instead of only ISO codes" - - - title: "Catalogs: Generate cover for mobi/azw3 catalogs" - - - title: "Update the last modified column record of a book, whenever a format is added to the book." - - - title: "E-book viewer: Fix line scrolling stops at breaks option not working in paged mode" - tickets: [1033430] - - - title: "MOBI Output: Fix ToC at start option having no effect when converting some input documents that have an out-of-spine ToC." - tickets: [1033656] - - - title: "Catalog Generation: When generating EPUB/MOBI catalogs add more flexible rules for excluding books. Also add rules to customize the prefix characters used." - - - title: "Make setting published date using metadata search/replace more robust." - - - title: "Tag Browser: Flatten the display of sub-groups when sort by is not set to 'name'." - tickets: [1032746] - - - title: "Fix isbn:false not matching if other identifiers are attached to the book." - - improved recipes: - - The New Republic - - ZDNet - - Metro UK - - FHM UK - - new recipes: - - title: eKundelek.pl - author: Artur Stachecki - - - title: Sueddeutsche Mobil - author: Andreas Zeiser - -- version: 0.8.63 - date: 2012-08-02 - - new features: - - title: "E-book viewer: Allow quick saving and loading of viewer settings as 'themes'." - tickets: [1024611] - - - title: "Ebook-viewer: Add a restore defaults button to the viewer preferences dialog" - - - title: "E-book viewer: Add simple settings for text and background colors" - - - title: "Add an entry to save to disk when right clicking a format in the book details panel" - - - title: "ODT metadata: Read first image as the metadata cover from ODT files. Also allow ODT authors to set custom properties for extended metadata." - - - title: "E-book viewer and PDF Output: Resize images that are longer than the page to fit onto a single page" - - bug fixes: - - title: "KF8 Output: Fix bug where some calibre generated KF8 files would cause the Amazon KF8 viewer on the Touch to go to into an infinite loop when using the next page function" - tickets: [1026421] - - - title: "News download: Add support for tags that link to SVG images." - tickets: [1031553] - - - title: "Update podofo to 0.9.1 in all binary builds, to fix corruption of some PDFs when updating metadata." - tickets: [1031086] - - - title: "Catalog generation: Handle authors whose last name is a number." - - - title: "KF8 Input: Handle html entities in the NCX toc entries correctly" - - - title: "Fix a calibre crash that affected some windows installs" - tickets: [1030234] - - - title: "MOBI Output: Normalize unicode strings before writing to file, to workaround lack of support for non-normal unicode in Amazon's MOBI renderer." - tickets: [1029825] - - - title: "EPUB Input: Handle files that have duplicate entries in the spine" - - - title: "Fix regression in Kobo driver that caused the on device column to not be updated after deleting books" - - new recipes: - - title: Dziennik Polski - author: Gregorz Maj - - - title: High Country Blogs - author: Armin Geller - - - title: Philosophy Now - author: Rick Shang - -- version: 0.8.62 - date: 2012-07-27 - - new features: - - title: "Book details panel: Allow right clicking on a format to delete it." - - - title: "When errors occur in lots of background jobs, add an option to the error message to temporarily suppress subsequent error messages." - tickets: [886904] - - - title: "E-book viewer full screen mode: Allow clicking in the left and right page margins to turn pages." - tickets: [1024819] - - - title: "Drivers for various Android devices" - tickets: [1028690,1027431] - - - title: "Advanced search dialog: When starting on the title/author/etc. tab, restore the previously used search kind as well." - tickets: [1029745] - - - title: "When presenting the calibre must be restarted warning after installing a new plugin, add a restart now button so that the user can conveniently restart calibre. Currently only works when going vie Preferences->Plugins->Get new plugins" - - bug fixes: - - title: "Fix main window layout state being saved incorrectly if calibre is killed without a proper shutdown" - - - title: "Fix boolean and date searching in non english calibre installs." - - - title: "Conversion: Ignore invalid chapter detection and level n ToC expressions instead of erroring out" - - improved recipes: - - Psychology Today - - The Smithsonian - - The New Republic - - Various updated Polish news sources - - The Sun - - San Francisco Bay Guardian - - AnandTech - - Smashing Magazine - - new recipes: - - title: Linux Journal and Conowego.pl - author: fenuks - - - title: A list apart and .net magazine - author: Marc Busque - -- version: 0.8.61 - date: 2012-07-20 - - new features: - - title: "E-book viewer: Add a paged mode that splits up the text into pages, like in a paper book instead of presenting it as a single column. To activate click the button with the yellow scroll icon in the top right corner." - type: major - description: "In paged mode, the ebook viewer no longer cuts off the last line of text at the bottom of the screen, and it respects CSS page-break directives. You can also set page margins and control the number of pages displayed on screen by clicking the Preferences button in the viewer and going to 'Text layout in paged mode'." - - - title: "Digitally sign the calibre OS X and windows builds" - - - title: "Get Books: Add Mills and Boon UK" - - - title: "Various minor improvements to the Bulk metadata edit dialog" - tickets: [1025825, 1025838, 1025628] - - - title: "Fix various regression in the auto-complete functionality for authors/series/tags etc introduced in 0.8.60" - - - title: "Drivers for various new Android devices" - tickets: [1024934] - - - title: "MOBI: Add support for the new language EXTH header field in MOBI files generated by kindlegen 2.5" - - bug fixes: - - title: "KF8 Output: Fix calibre produced KF8 files not showing the 'Use publisher font' option on the Kindle Touch when they have embedded fonts" - - - title: "Txt/fb2/rtf/pml/rb output: Fix non-visibile element's tail text (which should be visible) is being ignored when it shouldn't." - tickets: [1026541] - - - title: "Book details panel: When displaying a link to amazon, use a country specific name like amazon.fr instead of using amazon.com for all countries" - - - title: "Conversion: When splitting on page breaks, ignore page-breaks with values of auto and inherit. " - tickets: [1018875] - - - title: "Metadata jacket: Specify foreground in addition to the background color for the title banner so that it remain readable if the user tries to monkey with the CSS in the viewer." - - - title: "PDF Output: Fix rendering of cover as first age of PDF (ignore margins so that the image covers the entire page)" - - - title: "Linux binaries: Bundle libglib to avoid incompatibilities with glib on various distros." - tickets: [1022019] - - - title: "Fix find_identical_books() choking on books with too many authors" - - - improved recipes: - - Toronto Star - - American Prospect - - faz.net - -- version: 0.8.60 - date: 2012-07-13 - - new features: - - title: "When searching, allow use of un-accented characters to match accented characters in all fields and all languages (not just authors and English as before)" - description: "The rules for matching un-accented characters are done in a language dependent way. So if your calibre interface language is set to English, n will match both n and ñ, but if it is set to Spanish, it will match only n, as in Spanish ñ is a separate alphabet in Spanish. This makes searching a little slower, so if you have a very large library you can turn it off via Preferences->Searching." - type: major - - - title: "Content server: Show a best guess for the IP address the content server is currently listening at in the connect/share menu." - tickets: [1024128] - - - title: "E-book viewer: Add an option to show a clock in full screen mode." - tickets: [1022086] - - - title: "Drivers for Paquito Imaginarium and a few Android phones" - tickets: [1024021,1023613,1023461,1022401] - - - title: "HTMLZ Output: Add option to use the book title as the filename for the html file inside the archive" - - - title: "Make the list of displayed fields in the book details panel a per library setting" - - - title: "Have autocomplete on authors/series/tags/etc. ignore accented characters when finding matches (similar to the changes to search above)" - - - title: "Support for retina displays in OS X (I hope)" - tickets: [1022191] - - - title: "Remove the dependency on the zip command line tool when developing plugins" - - bug fixes: - - title: "Kobo driver: Do not perform write operations on the Kobo database if its version is newer than the latest version the driver supports, for safety" - - - title: "KF8 Input: Ignore encoding declarations inside the html markup, as they are sometimes incorrect." - tickets: [1022933] - - - title: "Force refresh of cached composite column values when values in the cache are changed" - - - title: "Fix a regression that broke calibre --shutdown-running-calibre on windows." - tickets: [1022504] - - - title: "Possible workaround for Qt 4.8.2 open file dialog failing on some linux distros." - tickets: [1022019] - - - title: "Catalogs: Fix some epubcheck errors when generating catalogs in EPUB format" - - - title: "Linux installer: When calling the xdg utilities use system libraries rather than the libraries bundled with calibre" - - - title: "Fix numeric sort for composite custom columns that use custom separators" - tickets: [1021814] - - - title: "Tag browser: When grouping by first letter, handle languages that have 'letters' made of more than one character. This can be turned off via Preferences->Tweaks" - - improved recipes: - - Hola magazine - - Adventure Gamers - - Cosmopolitan UK - - Onda Rock - - new recipes: - - title: Empire Magazine - author: Dave Asbury - - - title: NZZ Folio - author: Bernd Leinfelder - - - title: Warentest - author: asdfdsfksd - - -- version: 0.8.59 - date: 2012-07-06 - - new features: - - title: "Drivers for Samsung SGH-T989 and Sony Ericsson Sola" - tickets: [1021365] - - - title: "Conversion pipeline: When removing the first image, also remove the html file the image is found in, if that file has no other content. Allows this option to be used to remove covers from EPUB files without leaving behind a blank page." - - - title: "Content server: Add a navigation panel at the bottom of each page." - tickets: [1020225] - - - title: "calibredb: Add a backup_metadata command to manually run the backup to opf from the command line" - - - title: "User defined driver: Add option to swap main memory and card a." - tickets: [1020056] - - - title: "Add new option to the series_index_auto_increment tweak, no_change, that causes calibre not to change the series_index when the series is changed" - - bug fixes: - - title: "PDF Output: Resize large images so that they do not get off at the right edge of the page." - - - title: "On linux ensure that WM_CLASS for the main calibre GUI is set to 'calibre-gui' to match the name of the calibre-gui.desktop file. This is apparently required by the GNOME 3 shell." - tickets: [1020297] - - - title: "Update ICU in all builds to version 49.1" - - - title: "Tag browser: Fix regression that broke drag and drop between user categories in the tag browser" - - - title: "When copying to library and deleting after copy, do not place deleted files in recycle bin, as this is redundant and slow (they have already been copied into another library)" - - - title: "Fix yes/no fields with value of No not showing up in the book details panel" - - - title: "Catalogs: Better sorting for non English languages" - tickets: [930882] - - - title: "Get Books: Fix Foyles UK, Weightless books, ebooks.com and ozon.ru" - - - title: "CHM Input: Fix handling of chm files that split their html into multiple sub-directories." - tickets: [1018792] - - improved recipes: - - FHM UK - - The Age - - weblogs_ssl - - Heraldo.es - - new recipes: - - title: CATO Institute and Heritage Foundation - author: _reader - -- version: 0.8.58 - date: 2012-06-29 - - new features: - - title: "Add some texture to calibre generated covers" - - - title: "Drivers for Sogo SS-4370, HTC G2 and Lenovo ThinkPad Tablet" - tickets: [1019050, 1017010] - - - title: "Add search to the Manage tags/series/etc. dialogs" - - - title: "News download: Add support for images embedded in the HTML" - - - title: "calibre -s now waits for calibre to shutdown" - - bug fixes: - - title: "Workaround for iTunes breaking scripting with version 10.6.3 on OS X." - tickets: [1012243] - - - title: "EPUB Input: When there are multiple elements of the same type in the OPF guide, use the first rather than the last element." - - - title: "Windows: Disable the new UI style if the color depth of the desktop is less than 32 bits per pixel" - - - title: "ISBNDB metadata plugin: Return results even though they have no comments" - - - title: "More robust handling of EINTR during IPC" - - - title: "Metadata download: Support for amazon's new results page markup" - - - title: "EPUB Output: Fix a bug that could cause corrupted output when doing an EPUB/OEB to EPUB conversion if the input EPUB had multiple files with the same name" - - - title: "KF8 Output: Fix a couple of bugs that could lead to generation of invalid KF8 files." - tickets: [1016672] - - improved recipes: - - ABC Digital - - O Globo - - new recipes: - - title: Sign of the Times and New Statesman - author: TerminalVeracity - - - title: CT24 - author: zoidozoido - - - title: SmileZilla - author: Will - - - title: Marketing Sensoriale - author: NotTaken - -- version: 0.8.57 - date: 2012-06-22 - - new features: - - title: "PDF Output: Full pagination support. No more cutoff bottom line." - type: major - description: "Fixes a long standing bug in calibre's PDF Output that caused the bottom line of some pages to be partially cut off and prevented top and bottom margins from working." - - - title: "calibredb add now prints out the ids of added books" - tickets: [1014303] - - - title: "Kobo Vox driver: Add support for new Google Play firmware" - tickets: [1014129] - - - title: "Driver for Prestigio PMP5097PRO" - tickets: [1013864] - - - title: "Add option to disable tooltips in the book list under Preferences->Look & Feel" - - - title: "When customizing builtin recipes download the latest version of the recipe to customize instead of using the possibly out of date bundled version" - - bug fixes: - - title: "PDF Output: Use the cover from the input document when no cover is specified during a conversion" - - - title: "E-book Viewer: Printing now has proper pagination with top and bottom margins no lines partially cut-off at the bottom and full style retention" - - - title: "KF8 Input: Handle files with incorrectly encoded guide type entries." - tickets: [1015020] - - - title: "E-book viewer: Disable hyphenation on windows xp as Qt WebKit barfs on soft hyphens on windows XP" - - - title: "Handle OS X systems with invalid palette colors." - tickets: [1014900] - - - title: "Tag Browser: Fix regression that broke partitioning of hierarchical categories." - tickets: [1014065] - - - title: "LRF Output: Handle negative page margins" - tickets: [1014103] - - - title: "Template language: Fix arithmetic functions to tolerate the value 'None' as returned by raw_field()" - - - title: "Fix custom title sort set in the edit metadata dialog getting reset by the conversion dialog" - - improved recipes: - - The Economist - - Akter - - 24 Sata sr - - Novi List - - Metro Montreal - - Mode Durable - - CanardPC - - The Economic Collapse - - Our Daily Bread - - new recipes: - - title: Akter Daily - author: Darko MIletic - - - title: BBC Brasil - author: Claviola - - - title: Homopedia.pl - author: rainbowwarrior - - - title: National Geographic Magazine - author: Terminal Veracity - - - title: Something Awful - author: atordo - - - title: Huffington Post UK - author: Krittika Goyal - -- version: 0.8.56 - date: 2012-06-15 - - new features: - - title: "Make the new calibre style default on Windows and OS X." - type: major - description: "This change gives a more 'modern' feel to the calibre user interface with focus highlighting, gradients, rounded corners, etc. In case you prefer the old look, you can restore under Preferences->Look & Feel->User interface style" - - - title: "Get Books: Add the new SONY Reader store" - - - title: "Read metadata from .docx (Microsoft Word) files" - - - title: "Allow customizing the behavior of the searching for similar books by right clicking the book. You can now tell calibre to search different columns than the traditional author/series/publisher/tags/etc. in Preferences->Searching" - - - title: "Add option to restore alternating row colors to the Tag Browser under Preferences->Look & Feel->Tag Browser" - - - title: "Update to Qt 4.8.2 on windows compiled with link time code generation for a small performance boost" - - bug fixes: - - title: "Get Books: Update plugins to handle website changes at ebooks.com, project gutenberg, and virtualo" - - - title: "AZW3 Output: Fix TOC at start option not working" - - - title: "AZW3 Output: Close self closing script/style/title/head tags explicitly as they cause problems in webkit based renderers like the Kindle Fire and calibre's viewers." - - - title: "Fix the current_library_name() template function not updating after a library switch" - - - title: "AZW3 Output: Handle the case of a link pointing to the last line of text in the document." - tickets: [1011330] - - - title: "Fix regression in 0.8.55 that broke highlighting of items matching a search in the Tag Browser" - tickets: [1011030] - - - title: "News download: Handle query only relative URLs" - - improved recipes: - - Christian Science Monitor - - Neue Zurcher Zeitung - - Birmignham Post - - Metro UK - - New Musical Express - - The Independent - - The Daily Mirror - - Vreme - - Smithsonian Magazine - - new recipes: - - title: NZZ Webpaper - author: Bernd Leinfelder - - -- version: 0.8.55 - date: 2012-06-08 - - new features: - - title: "Add a new 'Calibre style' interface look that is more modern than the default look. You can select it via Preferences->Look & Feel->User interface style." - - - title: "New, subtler look for the Tag Browser" - - - title: "Driver for Trekstor Pyrus and Pantech Android Tablet" - tickets: [1008946, 1007929] - - - title: "Conversion pipeline: Handle guide elements with incorrectly cased hrefs. Also handle guide elements of type coverimagestandard and thumbimagestandard." - - - title: "Allow user to customize trekstor plugin to send books into sub directories." - tickets: [1007646] - - - title: "EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now." - tickets: [1008810] - - - title: "Save single format to disk: Only show the format available in the selected books." - tickets: [1007287] - - bug fixes: - - title: "MOBI Output: When using the insert metadata at start of book option, do not use a table to layout the metadata, as the Kindle Fire crashes when rendering the table." - tickets: [1002119] - - - title: "Device detection: Fix a bug that could cause device detection to fail completely if devices with certain vendor/product ids are connected." - tickets: [1009718] - - - title: "MOBI Output: When rasterizing svgs only compute style information when an actual svg image is present. Small speedup when converting large svg-free documents to MOBI." - - - title: "SONY T1 driver: Fix support for collections of books placed on the SD card" - tickets: [986044] - - - title: "Fix partitioning problems in tag browser with fields that have no name, such as identifiers and formats" - - - title: "Welcome wizard: Preferentially use the kindle email address set as default when more than one such address exists." - tickets: [1007932 ] - - - title: "Fix regression in 0.8.54 that broke the use of the shortcut Alt+A to select books by the same author" - - improved recipes: - - Various Polish recipes - - Vice Magazine - - EL Mundo Today - - Haaretz - - Good Housekeeping - - El Pais - - Christian Science Monitor - - Marketing Magazine - - Instapaper - - new recipes: - - title: Various Philippine news sources - author: jde - - - title: Natemat.pl and wirtualnemedia.pl - author: fenuks - - - title: Rabble.ca - author: timtoo - -- version: 0.8.54 - date: 2012-05-31 - - new features: - - title: "E-book viewer: The Table of contents panel now tracks the current position in the book. As you scroll through the book, the entry you are currently on is highlighted." - type: major - description: "To see this feature in action, open the Table of Contents panel in the viewer by clicking the button with three blue lines on it. As you page through the book, the chapter you are reading currently is highlighted in the Table of Contents Panel. Obviously, this will only work if the book you are reading has a Table of Contents. You can also use the Ctrl+PgUp and Ctrl+PgDn keys to quickly skip between chapters." - - - title: "calibredb: Allow setting metadata for individual fields with the set_metadata command" - - - title: "Make it a little harder to accidentally change the sorting of items in the Tag Browser. Also frees up more vertical space for the Tag Browser itself." - - - title: "The calibre user manual is now available in AZW3 format as well as EPUB" - - bug fixes: - - title: "Automatic titlecasing: No longer try to capitalize scottish names, as there are too many special cases." - tickets: [775825] - - - title: "Never crash when reading metadata from PDF files (reading now always happens in a worker process)" - tickets: [1006452] - - - title: "EPUB Input: Do no skip the valid children of an NCX node that has no text/href" - - - title: "Archos driver: Detect SD card" - tickets: [1005650] - - - title: "When bulk downloading metadata and the user deletes one of the books for which metadata is being downloaded, just ignore it, instead of erroring out" - - - title: "When deleting books from the bottom of the booklist, ensure that the bottom book after deleting is selected" - - - title: "Fix regression in 0.8.53 that broke sending APNX files to older Kindle devices" - - - title: "Use correct text color for selected rows in the list of matches when downloading metadata and showing results in get books." - tickets: [1004568] - - improved recipes: - - The Independent - - Welt der Physik - - China Daily - - The Grid - - Prospect Magazine - - new recipes: - - title: La gazetta del Mezzogiorno - author: faber1971 - diff --git a/manual/conversion.rst b/manual/conversion.rst index 0747ffaba9..c693d0be15 100644 --- a/manual/conversion.rst +++ b/manual/conversion.rst @@ -574,28 +574,33 @@ format, whether input or output are available in the conversion dialog under the Convert Microsoft Word documents ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document -as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the -"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word -produces really messy HTML, converting it can take a long time, so be patient. Another alternative is to -use the free OpenOffice. Open your .doc file in OpenOffice and save it in OpenOffice's format .odt. |app| can -directly convert .odt files. +|app| can automatically convert ``.docx`` files created by Microsoft Word 2007 and +newer. Just add the file to |app| and click convert (make sure you are running +the latest version of |app| as support for ``.docx`` files is very new). -There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes -generating the Table of Contents much simpler. It is called BookCreator and is available for free -at `mobileread `_. +.. note:: + There is a `demo .docx file `_ + that demonstrates the capabilities of the |app| conversion engine. Just + download it and convert it to EPUB or AZW3 to see what |app| can do. -An easy way to generate a Table of Contents when converting a Word document is: +|app| will automatically generate a Table of Contents based on headings if you mark +your headings with the ``Heading 1``, ``Heading 2``, etc. styles in Word. Open +the output ebook in the calibre viewer and click the Table of Contents button +to view the generated Table of Contents. - 1. Mark your Chapters and sub-Chapters in the doc file with one of the MS built-in styles called 'Heading 1', 'Heading 2', ..., 'Heading 6'. 'Heading 1' equates to the HTML tag

, 'Heading 2' to

etc +Older .doc files +^^^^^^^^^^^^^^^^^ - 2. Save the doc as Webpage-filtered (rather than Webpage) and import the html file into |app| - - 3. When you convert in |app| you use what you did in step 1 to set the box called 'Detect chapters at' on the Convert - Structure Detection page. For example: - - * If you mark Chapters with style 'Heading 2' then set the 'Detect chapters at' box to //h:h2 This will give you a proper external metadata TOC in the converted epub. - * A slightly more complex example...if your book has Sections and Chapters and you want a 2-level nested metadata TOC. Mark the doc Sections with style 'Heading 2' and the Chapters with style 'Heading 3'. When you convert set the 'Detect chapters at' box to //h:h2|//h:h3. On the Convert - TOC page set the 'Level 1 TOC' box to //h:h2 and the 'Level 2 TOC' box to //h:h3. +For older .doc files, you can save the document as HTML with Microsoft Word +and then convert the resulting HTML file with |app|. When saving as +HTML, be sure to use the "Save as Web Page, Filtered" option as this will +produce clean HTML that will convert well. Note that Word produces really messy +HTML, converting it can take a long time, so be patient. If you have a newer +version of Word available, you can directly save it as docx as well. +Another alternative is to use the free OpenOffice. Open your .doc file in +OpenOffice and save it in OpenOffice's format .odt. |app| can directly convert +.odt files. Convert TXT documents ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/manual/develop.rst b/manual/develop.rst index d75d546937..a939a442b4 100644 --- a/manual/develop.rst +++ b/manual/develop.rst @@ -115,16 +115,27 @@ commits:: Be careful to not include merges when using ``HEAD~n``. If you plan to do a lot of development on |app|, then the best method is to create a -`GitHub `_ account. Once you have an account, follow the -steps at `Setup Git `_ and -`Fork A Repo `_ to create your own fork of the -`calibre GitHub repository `_. Read -`Pushing to a remote `_ -to learn how to upload your commits to GitHub. +`GitHub `_ account. Below is a basic guide to setting up +your own fork of calibre in a way that will allow you to submit pull requests +for inclusion into the main |app| repository: + + * Setup git on your machine as described in this article: `Setup Git `_ + * Setup ssh keys for authentication to GitHub, as described here: `Generating SSH keys `_ + * Go to https://github.com/kovidgoyal/calibre and click the :guilabel:`Fork` button. + * In a Terminal do:: + + git clone git@github.com:/calibre.git + + Replace above with your github username. That will get your fork checked out locally. + * You can make changes and commit them whenever you like. When you are ready to have your work merged, do a:: + + git push + + and go to ``https://github.com//calibre`` and click the :guilabel:`Pull Request` button to generate a pull request that can be merged. + * You can update your local copy with code from the main repo at any time by doing:: + + git pull upstream -You can contribute your code in the form of `Pull Requests -`_. Generally, you should -create a new branch for any feature that is non-trivial. You should also keep an eye on the |app| `development forum `_. Before making @@ -297,10 +308,14 @@ code, with access to the |app| modules:: is great for testing a little snippet of code on the command line. It works in the same way as the -c switch to the python interpreter:: - calibre-debug -e myscript.py + calibre-debug myscript.py can be used to execute your own Python script. It works in the same way as passing the script to the Python interpreter, except -that the calibre environment is fully initialized, so you can use all the calibre code in your script. +that the calibre environment is fully initialized, so you can use all the calibre code in your script. To use command line arguments with your script, use the form:: + + calibre-debug myscript.py -- --option1 arg1 + +The ``--`` causes all subsequent arguments to be passed to your script. Using |app| in your projects @@ -313,7 +328,7 @@ Binary install of |app| If you have a binary install of |app|, you can use the Python interpreter bundled with |app|, like this:: - calibre-debug -e /path/to/your/python/script.py + calibre-debug /path/to/your/python/script.py -- arguments to your script Source install on Linux ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/manual/faq.rst b/manual/faq.rst index 7f7b7cae00..bdac21a622 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -20,7 +20,7 @@ What formats does |app| support conversion to/from? |app| supports the conversion of many input formats to many output formats. It can convert every input format in the following list, to every output format. -*Input Formats:* CBZ, CBR, CBC, CHM, DJVU, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ +*Input Formats:* CBZ, CBR, CBC, CHM, DJVU, DOCX, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ *Output Formats:* AZW3, EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ @@ -29,13 +29,14 @@ It can convert every input format in the following list, to every output format. PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers. PDB is also a generic format. |app| supports eReder, Plucker, PML and zTxt PDB files. DJVU support is only for converting DJVU files that contain embedded text. These are typically generated by OCR software. - MOBI books can be of two types Mobi6 and KF8. |app| fully supports both. MOBI files often have .azw or .azw3 file extensions + MOBI books can be of two types Mobi6 and KF8. |app| fully supports both. MOBI files often have .azw or .azw3 file extensions. + DOCX files from Microsoft Word 2007 and newer are supported. .. _best-source-formats: What are the best source formats to convert? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In order of decreasing preference: LIT, MOBI, AZW, EPUB, AZW3, FB2, HTML, PRC, RTF, PDB, TXT, PDF +In order of decreasing preference: LIT, MOBI, AZW, EPUB, AZW3, FB2, DOCX, HTML, PRC, ODT, RTF, PDB, TXT, PDF I converted a PDF file, but the result has various problems? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/recipes/folhadesaopaulo_sub.recipe b/recipes/folhadesaopaulo_sub.recipe index 53fc06bf1d..bd29a9603d 100644 --- a/recipes/folhadesaopaulo_sub.recipe +++ b/recipes/folhadesaopaulo_sub.recipe @@ -6,30 +6,37 @@ import datetime class FSP(BasicNewsRecipe): title = u'Folha de S\xE3o Paulo' - __author__ = 'fluzao' + __author__ = 'Joao Eduardo Bertacchi' description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \ u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]' - # found this to be the easiest place to find the index page (13-Nov-2011). + #found this to be the easiest place to find the index page (13-Nov-2011). # searching for the "Indice Geral" link HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/' + today=datetime.date.today() + FIRSTPAGE= 'cp' + str(today.day).zfill(2) + str(today.month).zfill(2) + str(today.year) + '.shtml' masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' language = 'pt_BR' no_stylesheets = True - max_articles_per_feed = 40 + max_articles_per_feed = 50 remove_javascript = True needs_subscription = True - remove_tags_before = dict(name='p') - remove_tags = [dict(name='td', attrs={'align':'center'})] +# remove_tags_before = dict(name='p') +# remove_tags_before = dict(name='div', id='articleNew') +# remove_tags_after = dict(name='div', id='articleNew') + keep_only_tags = [dict(name='div', id='articleNew'), dict(name='table', attrs={'class':'articleGraphic'})] + publication_type = 'newspaper' + simultaneous_downloads = 5 +# remove_tags = [dict(name='td', attrs={'align':'center'})] remove_attributes = ['height','width'] # fixes the problem with the section names - section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', - 'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', - 'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', - 'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio', - 'imoveis' : u'im\xf3veis', 'negocios' : u'neg\xf3cios', + section_dict = {'cotidian' : 'cotidiano', 'ilustrad': 'ilustrada', \ + 'quadrin': 'quadrinhos' , 'opiniao' : u'opini\xE3o', \ + 'ciencia' : u'ci\xeancia' , 'saude' : u'sa\xfade', \ + 'ribeirao' : u'ribeir\xE3o' , 'equilibrio' : u'equil\xedbrio', \ + 'imoveis' : u'im\xf3veis', 'negocios' : u'neg\xf3cios', \ 'veiculos' : u've\xedculos', 'corrida' : 'folha corrida'} # this solves the problem with truncated content in Kindle @@ -39,6 +46,40 @@ class FSP(BasicNewsRecipe): # Indice e Comunicar Erros preprocess_regexps = [(re.compile(r'.*Comunicar Erros', re.DOTALL|re.IGNORECASE), lambda match: r'')] + extra_css = """ +#articleNew { font: 18px Times New Roman,verdana,arial; } +img { background: none !important; float: none; margin: 0px; } +.newstexts { list-style-type: none; height: 20px; margin: 15px 0 10px 0; } +.newstexts.last { border-top: 1px solid #ccc; margin: 5px 0 15px 0; padding-top: 15px; } +.newstexts li { display: inline; padding: 0 5px; } +.newstexts li.prev { float: left; } +.newstexts li.next { float: right; } +.newstexts li span { width: 12px; height: 15px; display: inline-block; } +.newstexts li.prev span { background-position: -818px -46px; } +.newstexts li.next span { background-position: -832px -46px; } +.newstexts li a { font: bold 12px arial, verdana, sans-serif; text-transform: uppercase; color: #999; text-decoration: none !important; } +.newstexts li a:hover { text-decoration: underline !important } +.headerart { font-weight: bold; } +.title { font: bold 39px Times New Roman,verdana,arial; margin-bottom: 15px; margin-top: 10px; } +.creditart, .origin { font: bold 12px arial, verdana, sans-serif; color: #999; margin: 0px; display: block; } +.headerart p, .fine_line p { margin: 0 !important; } +.fine_line { font: bold 18px Times New Roman,verdana,arial; } +.fine_line p { margin-bottom: 18px !important; } +.fine_line p:first-child { font-weight: normal; font-style: italic; font-size: 20px !important; } +.eye { display: block; width: 317px; border-top: 2px solid #666; padding: 7px 0 7px; border-bottom: 2px solid #666; font-style: italic; font-weight: bold; } +.kicker { font-weight: bold; text-transform: uppercase; font-size: 18px; font-family: Times New Roman,verdana,arial !important; } +.blue { color: #000080; } +.red { color: #F00; } +.blue { color: #000080; } +.green { color: #006400; } +.orange { color: #FFA042; } +.violet { color: #8A2BE2; } +.text_footer { font-size: 15px; } +.title_end { font-size: 23px; font-weight: bold; } +.divisor { text-indent: -9999px; border-bottom: 1px solid #ccc; height: 1px; margin: 0; } +.star { background: none !important; height: 15px; } +.articleGraphic { margin-bottom: 20px; } +""" def get_browser(self): br = BasicNewsRecipe.get_browser(self) @@ -48,23 +89,33 @@ class FSP(BasicNewsRecipe): br['user'] = self.username br['pass'] = self.password br.submit().read() - # if 'Please try again' in raw: - # raise Exception('Your username and password are incorrect') +## if 'Please try again' in raw: +## raise Exception('Your username and password are incorrect') return br +# def postprocess_html(self, soup, first_fetch): +# #Clean-up normal articles +# tags = soup.findAll('div', id='articleNew') +# if tags and tags[0]: +# return tags[0] +# #Clean-up first page +# tags = soup.findAll('div', attrs={'class':'double_column facsimile'}) +# if tags and tags[0]: +# return tags[0] +# return soup + def parse_index(self): - # Searching for the index page on the HOMEPAGE - # hpsoup = self.index_to_soup(self.HOMEPAGE) - # indexref = hpsoup.find('a', href=re.compile('^indices.*')) - # self.log('--> tag containing the today s index: ', indexref) - # INDEX = indexref['href'] - # INDEX = 'http://www1.folha.uol.com.br/'+INDEX - today=datetime.date.today() - INDEX = 'http://www1.folha.uol.com.br/' + 'fsp/indices/index-' + str(today).replace('-','') + '.shtml' + #Searching for the index page on the HOMEPAGE + hpsoup = self.index_to_soup(self.HOMEPAGE) + #indexref = hpsoup.find('a', href=re.compile('^indices.*')) + #self.log('--> tag containing the today s index: ', indexref) + #INDEX = indexref['href'] + #INDEX = 'http://www1.folha.uol.com.br/'+INDEX + INDEX = 'http://www1.folha.uol.com.br/' + 'fsp/indices/index-' + str(self.today).replace('-','') + '.shtml' self.log('--> INDEX after extracting href and adding prefix: ', INDEX) # ... and taking the opportunity to get the cover image link - # coverurl = hpsoup.find('a', href=re.compile('^cp.*'))['href'] - coverurl = 'cp' + str(today.day).zfill(2) + str(today.month).zfill(2) + str(today.year) + '.shtml' + #coverurl = hpsoup.find('a', href=re.compile('^cp.*'))['href'] + coverurl = self.FIRSTPAGE if coverurl: self.log('--> tag containing the today s cover: ', coverurl) coverurl = coverurl.replace('shtml', 'jpg') @@ -72,35 +123,37 @@ class FSP(BasicNewsRecipe): self.log('--> coverurl after extracting href and adding prefix: ', coverurl) self.cover_url = coverurl - # soup = self.index_to_soup(self.INDEX) + #soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(INDEX) feeds = [] articles = [] - section_title = "Preambulo" + section_title = u'Primeira p\xe1gina' for post in soup.findAll('a'): # if name=True => new section strpost = str(post) - # if strpost.startswith(' post: ', post) @@ -111,15 +164,15 @@ class FSP(BasicNewsRecipe): feeds.append((section_title, articles)) # keeping the front page url - # minha_capa = feeds[0][1][1]['url'] + #minha_capa = feeds[0][1][1]['url'] # removing the first section ('Preambulo') del feeds[0] + #del feeds[0][1][0] # inserting the cover page as the first article (nicer for kindle users) - # feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}])) - feeds.insert(0,(u'Capa', [{'title':u'Capa' , 'url':self.get_cover_url().replace('jpg', 'shtml')}])) + #feeds.insert(0,(u'primeira p\xe1gina', [{'title':u'Primeira p\xe1gina' , 'url':minha_capa}])) + #feeds[0][1].insert(0,{'title':u'fac-s\xedmile da capa' , 'url':self.HOMEPAGE+self.FIRSTPAGE}) return feeds - diff --git a/recipes/frontlineonnet.recipe b/recipes/frontlineonnet.recipe index 3b65e4bb18..dc1d16cfd4 100644 --- a/recipes/frontlineonnet.recipe +++ b/recipes/frontlineonnet.recipe @@ -1,3 +1,4 @@ + __license__ = 'GPL v3' __copyright__ = '2011, Darko Miletic ' ''' @@ -5,7 +6,6 @@ frontlineonnet.com ''' import re -from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Frontlineonnet(BasicNewsRecipe): @@ -18,7 +18,7 @@ class Frontlineonnet(BasicNewsRecipe): delay = 1 INDEX = 'http://frontlineonnet.com/' use_embedded_content = False - encoding = 'cp1252' + encoding = 'utf-8' language = 'en_IN' publication_type = 'magazine' masthead_url = 'http://frontlineonnet.com/images/newfline.jpg' @@ -45,37 +45,36 @@ class Frontlineonnet(BasicNewsRecipe): ] keep_only_tags= [ - dict(name='font', attrs={'class':'storyhead'}) - ,dict(attrs={'class':'byline'}) + dict(name='div', attrs={'id':'content'}) + #,dict(attrs={'class':'byline'}) ] - remove_attributes=['size','noshade','border'] + #remove_attributes=['size','noshade','border'] - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll('img'): - if not item.has_key('alt'): - item['alt'] = 'image' - return soup + #def preprocess_html(self, soup): + #for item in soup.findAll(style=True): + #del item['style'] + #for item in soup.findAll('img'): + #if not item.has_key('alt'): + #item['alt'] = 'image' + #return soup def parse_index(self): articles = [] soup = self.index_to_soup(self.INDEX) - for feed_link in soup.findAll('a',href=True): - if feed_link['href'].startswith('stories/'): - url = self.INDEX + feed_link['href'] - title = self.tag_to_string(feed_link) - date = strftime(self.timefmt) - articles.append({ - 'title' :title - ,'date' :date - ,'url' :url - ,'description':'' - }) + for feed_link in soup.findAll('div', id='headseccol'): + a = feed_link.find('a', href=True) + title = self.tag_to_string(a) + url = a['href'] + articles.append({ + 'title' :title + ,'date' :'' + ,'url' :url + ,'description':'' + }) return [('Frontline', articles)] - def print_version(self, url): - return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2] + #def print_version(self, url): + #return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2] - def image_url_processor(self, baseurl, url): - return url.replace('../images/', self.INDEX + 'images/').strip() + #def image_url_processor(self, baseurl, url): + #return url.replace('../images/', self.INDEX + 'images/').strip() diff --git a/recipes/greader.recipe b/recipes/greader.recipe deleted file mode 100644 index 2c9d5aa015..0000000000 --- a/recipes/greader.recipe +++ /dev/null @@ -1,35 +0,0 @@ -import urllib, re, mechanize -from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre import __appname__ - -class GoogleReader(BasicNewsRecipe): - title = 'Google Reader' - description = 'This recipe fetches from your Google Reader account unread Starred items and unread Feeds you have placed in a folder via the manage subscriptions feature.' - needs_subscription = True - __author__ = 'davec, rollercoaster, Starson17' - base_url = 'http://www.google.com/reader/atom/' - oldest_article = 365 - max_articles_per_feed = 250 - get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed - use_embedded_content = True - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - request = urllib.urlencode([('Email', self.username), ('Passwd', self.password), - ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)]) - response = br.open('https://www.google.com/accounts/ClientLogin', request) - auth = re.search('Auth=(\S*)', response.read()).group(1) - cookies = mechanize.CookieJar() - br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) - br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)] - return br - - def get_feeds(self): - feeds = [] - soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list') - for id in soup.findAll(True, attrs={'name':['id']}): - url = id.contents[0] - feeds.append((re.search('/([^/]*)$', url).group(1), - self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options)) - return feeds diff --git a/recipes/greader_uber.recipe b/recipes/greader_uber.recipe deleted file mode 100644 index 5e02cdef5d..0000000000 --- a/recipes/greader_uber.recipe +++ /dev/null @@ -1,35 +0,0 @@ -import urllib, re, mechanize -from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre import __appname__ - -class GoogleReaderUber(BasicNewsRecipe): - title = 'Google Reader uber' - description = 'Fetches all feeds from your Google Reader account including the uncategorized items.' - needs_subscription = True - __author__ = 'davec, rollercoaster, Starson17' - base_url = 'http://www.google.com/reader/atom/' - oldest_article = 365 - max_articles_per_feed = 250 - get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed - use_embedded_content = True - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - request = urllib.urlencode([('Email', self.username), ('Passwd', self.password), - ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)]) - response = br.open('https://www.google.com/accounts/ClientLogin', request) - auth = re.search('Auth=(\S*)', response.read()).group(1) - cookies = mechanize.CookieJar() - br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) - br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)] - return br - - def get_feeds(self): - feeds = [] - soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list') - for id in soup.findAll(True, attrs={'name':['id']}): - url = id.contents[0].replace('broadcast','reading-list') - feeds.append((re.search('/([^/]*)$', url).group(1), - self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options)) - return feeds diff --git a/recipes/lanacion.recipe b/recipes/lanacion.recipe index c1cf8f6ae9..fe418f117d 100644 --- a/recipes/lanacion.recipe +++ b/recipes/lanacion.recipe @@ -1,5 +1,4 @@ -__license__ = 'GPL v3' -__copyright__ = '2008-2011, Darko Miletic ' +__copyright__ = '2008-2013, Darko Miletic ' ''' lanacion.com.ar ''' @@ -45,36 +44,32 @@ class Lanacion(BasicNewsRecipe): remove_tags_after = dict(attrs={'id':'relacionadas'}) feeds = [ - (u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' ) - ,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' ) - ,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' ) - ,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' ) - ,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' ) - ,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' ) - ,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' ) - ,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' ) - ,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' ) - ,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' ) - ,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' ) - ,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' ) - ,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' ) - ,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' ) - ,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' ) - ,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' ) - ,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' ) - ,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312') - ,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363') - ,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348') - ,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734') - ,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373') - ,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344') - ,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380') - ,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276') + (u'Politica' , u'http://lanacion.com.ar.feedsportal.com/politica' ) + ,(u'Deportes' , u'http://lanacion.com.ar.feedsportal.com/deportes' ) + ,(u'Economia' , u'http://lanacion.com.ar.feedsportal.com/economia' ) + ,(u'Sociedad' , u'http://lanacion.com.ar.feedsportal.com/sociedad' ) + ,(u'Seguridad' , u'http://lanacion.com.ar.feedsportal.com/seguridad' ) + ,(u'Buenos Aires' , u'http://lanacion.com.ar.feedsportal.com/buenosaires' ) + ,(u'Opinion' , u'http://lanacion.com.ar.feedsportal.com/opinion' ) + ,(u'Espectaculos' , u'http://lanacion.com.ar.feedsportal.com/espectaculos' ) + ,(u'El Mundo' , u'http://lanacion.com.ar.feedsportal.com/mundo' ) + ,(u'Revista' , u'http://lanacion.com.ar.feedsportal.com/revistalanacion' ) + ,(u'Enfoques' , u'http://lanacion.com.ar.feedsportal.com/enfoques' ) + ,(u'Comercio Exterior' , u'http://lanacion.com.ar.feedsportal.com/comercioexterior' ) + ,(u'Tecnologia' , u'http://lanacion.com.ar.feedsportal.com/tecnologia' ) + ,(u'Turismo' , u'http://lanacion.com.ar.feedsportal.com/turismo' ) + ,(u'Al volante' , u'http://lanacion.com.ar.feedsportal.com/alvolante' ) + ,(u'El Campo' , u'http://lanacion.com.ar.feedsportal.com/elcampo' ) + ,(u'Moda y Belleza' , u'http://lanacion.com.ar.feedsportal.com/modaybelleza' ) + ,(u'Inmuebles Comerciales', u'http://lanacion.com.ar.feedsportal.com/inmueblescomerciales' ) + ,(u'Countries' , u'http://lanacion.com.ar.feedsportal.com/countries' ) + ,(u'adnCultura' , u'http://lanacion.com.ar.feedsportal.com/adncultura' ) + ,(u'The WSJ Americas' , u'http://lanacion.com.ar.feedsportal.com/wallstreetjournalamericas') ] def get_article_url(self, article): - link = BasicNewsRecipe.get_article_url(self,article) + link = article.get('guid', None) if link.startswith('http://blogs.lanacion') and not link.endswith('/'): return self.browser.open_novisit(link).geturl() if link.rfind('galeria=') > 0: diff --git a/recipes/las_vegas_review.recipe b/recipes/las_vegas_review.recipe index 1fce904c7b..0072826a61 100644 --- a/recipes/las_vegas_review.recipe +++ b/recipes/las_vegas_review.recipe @@ -1,5 +1,6 @@ from calibre.web.feeds.news import BasicNewsRecipe + class AdvancedUserRecipe1274742400(BasicNewsRecipe): title = u'Las Vegas Review Journal' @@ -9,24 +10,24 @@ class AdvancedUserRecipe1274742400(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 100 - #keep_only_tags = [dict(id='content-main')] - #remove_tags = [dict(id=['right-col-content', 'trending-topics']), + # keep_only_tags = [dict(id='content-main')] + # remove_tags = [dict(id=['right-col-content', 'trending-topics']), #{'class':['ppy-outer']} #] no_stylesheets = True use_embedded_content = False auto_cleanup = True - feeds = [ - (u'News', u'http://www.lvrj.com/news.rss'), - (u'Business', u'http://www.lvrj.com/business.rss'), - (u'Living', u'http://www.lvrj.com/living.rss'), - (u'Opinion', u'http://www.lvrj.com/opinion.rss'), - (u'Neon', u'http://www.lvrj.com/neon.rss'), - #(u'Image', u'http://www.lvrj.com/image.rss'), - #(u'Home & Garden', u'http://www.lvrj.com/home_and_garden.rss'), - #(u'Furniture & Design', u'http://www.lvrj.com/furniture_and_design.rss'), - #(u'Drive', u'http://www.lvrj.com/drive.rss'), - #(u'Real Estate', u'http://www.lvrj.com/real_estate.rss'), - (u'Sports', u'http://www.lvrj.com/sports.rss')] + (u'Top Stories', u'http://www.reviewjournal.com/rss.xml'), + (u'News', u'http://www.reviewjournal.com/news/feed'), + (u'Business', u'http://www.reviewjournal.com/business/feed'), + (u'Living', u'http://www.reviewjournal.com/living/feed'), + (u'Opinion', u'http://www.reviewjournal.com/opinion/feed'), + (u'Neon', u'http://www.reviewjournal.com/neon/feed'), + #(u'Image', u'http://www.lvrj.com/image.rss'), + #(u'Home & Garden', u'http://www.lvrj.com/home_and_garden.rss'), + #(u'Furniture & Design', u'http://www.lvrj.com/furniture_and_design.rss'), + #(u'Drive', u'http://www.lvrj.com/drive.rss'), + #(u'Real Estate', u'http://www.lvrj.com/real_estate.rss'), + (u'Sports', u'http://www.reviewjournal.com/sports/feed')] diff --git a/recipes/metro_news_nl.recipe b/recipes/metro_news_nl.recipe index 0995719939..07b08bd5e5 100644 --- a/recipes/metro_news_nl.recipe +++ b/recipes/metro_news_nl.recipe @@ -39,6 +39,8 @@ from BeautifulSoup import BeautifulSoup Version 1.9.4 19-04-2013 Added regex filter for mailto Updated for new layout of metro-site + Version 1.9.5 28-05-2013 + Added some extra id's and classes to remove ''' class AdvancedUserRecipe1306097511(BasicNewsRecipe): @@ -46,7 +48,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): oldest_article = 1.2 max_articles_per_feed = 25 __author__ = u'DrMerry' - description = u'Metro Nederland v1.9.4 2013-04-19' + description = u'Metro Nederland v1.9.5 2013-05-28, Download nieuws van de Nederlandse editie van de krant Metro' language = u'nl' simultaneous_downloads = 5 masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif' @@ -70,7 +72,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): #(re.compile(r'<(a |/a)[^>]*>', re.DOTALL|re.IGNORECASE),lambda match:'') #(re.compile('(', re.IGNORECASE), lambda m: ''), (re.compile(r'', re.IGNORECASE), lambda m: '')] + + feeds = [(u'Aktualno\u015bci', u'http://7thguard.net/feed/')] + + diff --git a/recipes/time_magazine.recipe b/recipes/time_magazine.recipe index 9905a1df1d..b44cb9823b 100644 --- a/recipes/time_magazine.recipe +++ b/recipes/time_magazine.recipe @@ -1,77 +1,67 @@ #!/usr/bin/env python +from __future__ import (unicode_literals, division, absolute_import, + print_function) __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' +__copyright__ = '2013, Kovid Goyal ' ''' time.com ''' -import re -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.jsnews import JavascriptRecipe from lxml import html -class Time(BasicNewsRecipe): +def wait_for_load(browser): + # This element is present in the black login bar at the top + browser.wait_for_element('#site-header p.constrain', timeout=180) + +# Keep the login method as standalone, so it can be easily tested +def do_login(browser, username, password): + from calibre.web.jsbrowser.browser import Timeout + browser.visit('http://www.time.com/time/magazine') + form = browser.select_form('#magazine-signup') + form['username'] = username + form['password'] = password + browser.submit('#paid-wall-submit') + try: + wait_for_load(browser) + except Timeout: + raise ValueError('Failed to login to time.com, check your username and password and try again in a little while.') + + +class Time(JavascriptRecipe): title = u'Time' - __author__ = 'Kovid Goyal, Rick Shang' - description = ('Weekly US magazine.') - encoding = 'utf-8' + __author__ = 'Kovid Goyal' + description = 'Weekly US magazine.' + language = 'en' + needs_subscription = True + requires_version = (0, 9, 35) + no_stylesheets = True - language = 'en' remove_javascript = True - needs_subscription = True + keep_only_tags = ['article.post'] + remove_tags = ['meta', '.entry-sharing', '.entry-footer', '.wp-paginate', + '.post-rail', '.entry-comments', '.entry-tools', + '#paid-wall-cm-ad'] - keep_only_tags = [ - { - 'class':['primary-col', 'tout1'] - }, - ] - remove_tags = [ - {'class':['button', 'entry-sharing group', 'wp-paginate', - 'moving-markup', 'entry-comments']}, + recursions = 1 + links_from_selectors = ['.wp-paginate a.page[href]'] - ] extra_css = '.entry-date { padding-left: 2ex }' - preprocess_regexps = [(re.compile( - r''), lambda m:'')] + def do_login(self, browser, username, password): + do_login(browser, username, password) - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - # This site uses javascript in its login process - if self.username is not None and self.password is not None: - br.open('http://www.time.com/time/magazine') - br.select_form(predicate=lambda f: 'action' in f.attrs and f.attrs['action'] == 'https://auth.time.com/login.php') - br['username'] = self.username - br['password'] = self.password - # br['magcode'] = ['TD'] - br.find_control('turl').readonly = False - br['turl'] = 'http://www.time.com/time/magazine' - br.find_control('rurl').readonly = False - br['rurl'] = 'http://www.time.com/time/magazine' - br['remember'] = False - raw = br.submit().read() - if False and '>Log Out<' not in raw: - # This check is disabled as it does not work (there is probably - # some cookie missing) however, the login is "sufficient" for - # the actual article downloads to work. - raise ValueError('Failed to login to time.com, check' - ' your username and password') - return br - - def parse_index(self): - raw = self.index_to_soup('http://www.time.com/time/magazine', raw=True) + def get_publication_data(self, browser): + selector = 'section.sec-mag-showcase ul.ul-mag-showcase img[src]' + cover = browser.css_select(selector) + # URL for large cover + cover_url = unicode(cover.evaluateJavaScript('this.src').toString()).replace('_400.', '_600.') + raw = browser.html + ans = {'cover': browser.get_resource(cover_url)} + # We are already at the magazine page thanks to the do_login() method root = html.fromstring(raw) - img = root.xpath('//a[.="View Large Cover" and @href]') - if img: - cover_url = 'http://www.time.com' + img[0].get('href') - try: - nsoup = self.index_to_soup(cover_url) - img = nsoup.find('img', src=re.compile('archive/covers')) - if img is not None: - self.cover_url = img['src'] - except: - self.log.exception('Failed to fetch cover') dates = ''.join(root.xpath('//time[@class="updated"]/text()')) if dates: @@ -90,27 +80,22 @@ class Time(BasicNewsRecipe): if articles: feeds.append((section, articles)) - return feeds + ans['index'] = feeds + return ans def find_articles(self, sec): - for article in sec.xpath('./article'): h2 = article.xpath('./*[@class="entry-title"]') - if not h2: continue + if not h2: + continue a = h2[0].xpath('./a[@href]') - if not a: continue + if not a: + continue title = html.tostring(a[0], encoding=unicode, method='text').strip() - if not title: continue + if not title: + continue url = a[0].get('href') - if url.startswith('/'): - url = 'http://www.time.com'+url - if '/article/0,' in url: - soup = self.index_to_soup(url) - a = soup.find('a', href=lambda x:x and '/printout/' in x) - url = a['href'].replace('/printout', '/subscriber/printout') - else: - url += 'print/' if url.endswith('/') else '/print/' if url.startswith('/'): url = 'http://www.time.com'+url desc = '' @@ -126,10 +111,35 @@ class Time(BasicNewsRecipe): 'description' : desc } - def preprocess_html(self, soup): - for fig in soup.findAll('figure'): - img = fig.find('img') - if img is not None: - fig.replaceWith(img) - return soup + def load_complete(self, browser, url, recursion_level): + # This is needed as without it, subscriber content is blank. time.com + # appears to be using some crazy iframe+js callback for loading content + wait_for_load(browser) + return True + def postprocess_html(self, article, root, url, recursion_level): + # Remove the header and page n of m messages from pages after the first + # page + if recursion_level > 0: + for h in root.xpath('//header[@class="entry-header"]|//span[@class="page"]'): + h.getparent().remove(h) + # Unfloat the article images and also remove them from pages after the + # first page as they are repeated on every page. + for fig in root.xpath('//figure'): + parent = fig.getparent() + if recursion_level > 0: + parent.remove(fig) + else: + idx = parent.index(fig) + for img in reversed(fig.xpath('descendant::img')): + parent.insert(idx, img) + parent.remove(fig) + return root + +if __name__ == '__main__': + # Test the login + import sys + from calibre import jsbrowser + br = jsbrowser(default_timeout=120) + do_login(br, sys.argv[-2], sys.argv[-1]) + br.show_browser() diff --git a/recipes/toi.recipe b/recipes/toi.recipe index fc87920c9c..f14a4af5fe 100644 --- a/recipes/toi.recipe +++ b/recipes/toi.recipe @@ -1,74 +1,58 @@ -import re, urllib +# vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe +from lxml import html + +allowed_sections = {'Top Headlines', 'Opinion', 'Science', 'Education', 'US', 'Pakistan', 'India Business', 'Tech News', 'Cricket', 'Bollywood'} class TimesOfIndia(BasicNewsRecipe): - title = u'Times of India' - language = 'en_IN' + title = u'Times of India Headlines' + language = 'en' + description = 'Headline news from the Indian daily Times of India' __author__ = 'Kovid Goyal' - oldest_article = 1 #days - max_articles_per_feed = 25 no_stylesheets = True - remove_attributes = ['style'] - keep_only_tags = [ - {'class':re.compile(r'maintable12|prttabl')}, - {'id':['mod-article-header', - 'mod-a-body-after-first-para', 'mod-a-body-first-para']}, - ] + no_javascript = True + keep_only_tags = [dict(name='h1'), dict(id=['storydiv', 'contentarea'])] remove_tags = [ - {'class':re.compile('tabsintbgshow|prvnxtbg')}, - {'id':['fbrecommend', 'relmaindiv', 'shretxt', 'fbrecos', 'twtdiv', - 'gpls', 'auim']}, - {'class':['twitter-share-button', 'cmtmn']}, - ] + dict(name='div', attrs={'class':['video_list', 'rightpart', 'clearfix mTop15', 'footer_slider', 'read_more', 'flR', 'hide_new']}), + dict(name='div', attrs={'id':[ + 'most_pop', 'relartstory', 'slidebox', 'tmpFbokk', 'twittersource', + 'reportAbuseDiv', 'result', 'yahoobuzzsyn', 'fb-root']}), + dict(style='float:right;margin-left:5px;'), + ] - feeds = [ -('Top Stories', - 'http://timesofindia.indiatimes.com/rssfeedstopstories.cms'), -('India', - 'http://timesofindia.indiatimes.com/rssfeeds/-2128936835.cms'), -('World', - 'http://timesofindia.indiatimes.com/rssfeeds/296589292.cms'), -('Mumbai', - 'http://timesofindia.indiatimes.com/rssfeeds/-2128838597.cms'), -('Entertainment', - 'http://timesofindia.indiatimes.com/rssfeeds/1081479906.cms'), -('Cricket', - 'http://timesofindia.indiatimes.com/rssfeeds/4719161.cms'), -('Sunday TOI', - 'http://timesofindia.indiatimes.com/rssfeeds/1945062111.cms'), -('Life and Style', - 'http://timesofindia.indiatimes.com/rssfeeds/2886704.cms'), -('Business', - 'http://timesofindia.indiatimes.com/rssfeeds/1898055.cms'), -('Mad Mad World', - 'http://timesofindia.indiatimes.com/rssfeeds/2178430.cms'), -('Most Read', - 'http://timesofindia.indiatimes.com/rssfeedmostread.cms') -] + def parse_index(self): + index = 'http://timesofindia.indiatimes.com/home/headlines' + raw = self.index_to_soup(index, raw=True) + root = html.fromstring(raw) + + feeds = [] + current_section = None + current_articles = [] + + toc = root.xpath('//div[@align="center"]/descendant::table[@class="cnt"]')[0] + + for x in toc.xpath('descendant::*[name()="h3" or (name()="ul" and @class="content")]'): + if x.tag == 'h3': + if current_articles and current_section in allowed_sections: + feeds.append((current_section, current_articles)) + current_section = html.tostring(x, method='text', encoding=unicode).strip() + current_articles = [] + self.log(current_section) + else: + for a in x.xpath('descendant::li/descendant::a[@href]'): + title = html.tostring(a, method='text', encoding=unicode).strip() + url = a.get('href') + if url.startswith('/'): + url = 'http://timesofindia.indiatimes.com' + url + self.log(' ', title) + current_articles.append({'title':title, 'url':url}) + self.log('') + + if current_articles and current_section in allowed_sections: + feeds.append((current_section, current_articles)) + + return feeds - def get_article_url(self, article): - try: - s = article.summary - return urllib.unquote( - re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1)) - except: - pass - link = article.get('link', None) - if link and link.split('/')[-1]=="story01.htm": - link=link.split('/')[-2] - encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&', - '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://'} - for k, v in encoding.iteritems(): - link = link.replace(k, v) - return link - def print_version(self, url): - return url + '?prtpage=1' - def preprocess_html(self, soup, *args): - byl = soup.find(attrs={'class':'byline'}) - if byl is not None: - for l in byl.findAll('label'): - l.extract() - return soup diff --git a/session.vim b/session.vim index 589ae279d7..a513afc2ec 100644 --- a/session.vim +++ b/session.vim @@ -1,3 +1,6 @@ +" Scan the following dirs (recursively for tags +let g:project_tags_dirs = ['src/calibre'] + " Include directories for C++ modules let g:syntastic_cpp_include_dirs = [ \'/usr/include/python2.7', @@ -27,7 +30,7 @@ fun! CalibreLog() hi def link au Keyword syntax match au /^.*:::$/ nnoremap n :call cursor(1+search('\V:::\$', 'n'), 0) - nnoremap yb vt#ty + nnoremap yb v/#ty:nohl normal! gg2j edit Changelog.yaml edit src/calibre/constants.py diff --git a/setup/upload.py b/setup/upload.py index 82aa00e57c..673f9f4679 100644 --- a/setup/upload.py +++ b/setup/upload.py @@ -63,7 +63,7 @@ def upload_signatures(): shell=True) shutil.rmtree(tdir) -class ReUpload(Command): # {{{ +class ReUpload(Command): # {{{ description = 'Re-uplaod any installers present in dist/' @@ -118,7 +118,7 @@ def run_remote_upload(args): # }}} -class UploadInstallers(Command): # {{{ +class UploadInstallers(Command): # {{{ def add_options(self, parser): parser.add_option('--replace', default=False, action='store_true', help= @@ -172,7 +172,7 @@ class UploadInstallers(Command): # {{{ run_remote_upload(args) # }}} -class UploadUserManual(Command): # {{{ +class UploadUserManual(Command): # {{{ description = 'Build and upload the User Manual' sub_commands = ['manual'] @@ -184,7 +184,8 @@ class UploadUserManual(Command): # {{{ with CurrentDir(path): with ZipFile(f, 'w') as zf: for x in os.listdir('.'): - if x.endswith('.swp'): continue + if x.endswith('.swp'): + continue zf.write(x) if os.path.isdir(x): for y in os.listdir(x): @@ -203,7 +204,7 @@ class UploadUserManual(Command): # {{{ 'bugs:%s'%USER_MANUAL]), shell=True) # }}} -class UploadDemo(Command): # {{{ +class UploadDemo(Command): # {{{ description = 'Rebuild and upload various demos' @@ -223,20 +224,20 @@ class UploadDemo(Command): # {{{ check_call('scp /tmp/html-demo.zip divok:%s/'%(DOWNLOADS,), shell=True) # }}} -class UploadToServer(Command): # {{{ +class UploadToServer(Command): # {{{ description = 'Upload miscellaneous data to calibre server' def run(self, opts): check_call('ssh divok rm -f %s/calibre-\*.tar.xz'%DOWNLOADS, shell=True) - #check_call('scp dist/calibre-*.tar.xz divok:%s/'%DOWNLOADS, shell=True) + # check_call('scp dist/calibre-*.tar.xz divok:%s/'%DOWNLOADS, shell=True) check_call('gpg --armor --detach-sign dist/calibre-*.tar.xz', shell=True) check_call('scp dist/calibre-*.tar.xz.asc divok:%s/signatures/'%DOWNLOADS, shell=True) check_call('ssh divok /usr/local/bin/update-calibre', shell=True) - check_call('''ssh divok echo %s \\> %s/latest_version'''\ + check_call('''ssh divok echo %s \\> %s/latest_version''' %(__version__, DOWNLOADS), shell=True) check_call('ssh divok /etc/init.d/apache2 graceful', shell=True) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index bd7d01c0a0..5e940efcd9 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -408,6 +408,10 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None, return opener +def jsbrowser(*args, **kwargs): + from calibre.web.jsbrowser.browser import Browser + return Browser(*args, **kwargs) + def fit_image(width, height, pwidth, pheight): ''' Fit image in box of width pwidth and height pheight. diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 3cf610e740..4c769a1fba 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -4,7 +4,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = u'calibre' -numeric_version = (0, 9, 33) +numeric_version = (0, 9, 34) __version__ = u'.'.join(map(unicode, numeric_version)) __author__ = u"Kovid Goyal " diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 8daa42c36f..e6073a3bd4 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -554,6 +554,7 @@ from calibre.ebooks.conversion.plugins.txt_input import TXTInput from calibre.ebooks.conversion.plugins.lrf_input import LRFInput from calibre.ebooks.conversion.plugins.chm_input import CHMInput from calibre.ebooks.conversion.plugins.snb_input import SNBInput +from calibre.ebooks.conversion.plugins.docx_input import DOCXInput from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput from calibre.ebooks.conversion.plugins.fb2_output import FB2Output @@ -595,6 +596,7 @@ plugins += [ LRFInput, CHMInput, SNBInput, + DOCXInput, ] plugins += [ EPUBOutput, diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 76c5841180..c6aa2e646f 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -985,11 +985,19 @@ class DB(object): else: if callable(getattr(data, 'read', None)): data = data.read() - try: - save_cover_data_to(data, path) - except (IOError, OSError): - time.sleep(0.2) - save_cover_data_to(data, path) + if data is None: + if os.path.exists(path): + try: + os.remove(path) + except (IOError, OSError): + time.sleep(0.2) + os.remove(path) + else: + try: + save_cover_data_to(data, path) + except (IOError, OSError): + time.sleep(0.2) + save_cover_data_to(data, path) def copy_format_to(self, book_id, fmt, fname, path, dest, windows_atomic_move=None, use_hardlink=False): diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index ba68de23a7..b79ff2a31b 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -826,7 +826,8 @@ class Cache(object): @write_api def set_cover(self, book_id_data_map): ''' Set the cover for this book. data can be either a QImage, - QPixmap, file object or bytestring ''' + QPixmap, file object or bytestring. It can also be None, in which + case any existing cover is removed. ''' for book_id, data in book_id_data_map.iteritems(): try: @@ -836,7 +837,8 @@ class Cache(object): path = self._field_for('path', book_id).replace('/', os.sep) self.backend.set_cover(book_id, path, data) - self._set_field('cover', {book_id:1 for book_id in book_id_data_map}) + return self._set_field('cover', { + book_id:(0 if data is None else 1) for book_id, data in book_id_data_map.iteritems()}) @write_api def set_metadata(self, book_id, mi, ignore_errors=False, force_changes=False, diff --git a/src/calibre/db/tests/main.py b/src/calibre/db/tests/main.py index bdc9561ec5..461edc95bb 100644 --- a/src/calibre/db/tests/main.py +++ b/src/calibre/db/tests/main.py @@ -24,16 +24,23 @@ if __name__ == '__main__': args = parser.parse_args() if args.name and args.name.startswith('.'): tests = find_tests() + q = args.name[1:] + if not q.startswith('test_'): + q = 'test_' + q ans = None try: for suite in tests: for test in suite._tests: for s in test: - if s._testMethodName == args.name[1:]: - tests = s + if s._testMethodName == q: + ans = s raise StopIteration() except StopIteration: pass + if ans is None: + print ('No test named %s found' % args.name) + raise SystemExit(1) + tests = ans else: tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if args.name else find_tests() unittest.TextTestRunner(verbosity=4).run(tests) diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py index 9ec368dd83..6d3169b905 100644 --- a/src/calibre/db/tests/writing.py +++ b/src/calibre/db/tests/writing.py @@ -355,7 +355,28 @@ class WritingTest(BaseTest): ae(opf.authors, ['author1', 'author2']) # }}} - def test_set_cover(self): + def test_set_cover(self): # {{{ ' Test setting of cover ' - self.assertTrue(False, 'TODO: test set_cover() and set_metadata()') + cache = self.init_cache() + ae = self.assertEqual + + # Test removing a cover + ae(cache.field_for('cover', 1), 1) + ae(cache.set_cover({1:None}), set([1])) + ae(cache.field_for('cover', 1), 0) + + img = b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00`\x00`\x00\x00\xff\xe1\x00\x16Exif\x00\x00II*\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xdb\x00C\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xff\xdb\x00C\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xff\xc0\x00\x11\x08\x00\x01\x00\x01\x03\x01"\x00\x02\x11\x01\x03\x11\x01\xff\xc4\x00\x15\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n\xff\xc4\x00\x14\x10\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4\x00\x14\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4\x00\x14\x11\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xda\x00\x0c\x03\x01\x00\x02\x11\x03\x11\x00?\x00\xbf\x80\x01\xff\xd9' # noqa {{{ }}} + # Test setting a cover + ae(cache.set_cover({bid:img for bid in (1, 2, 3)}), {1, 2, 3}) + old = self.init_old() + for book_id in (1, 2, 3): + ae(cache.cover(book_id), img, 'Cover was not set correctly for book %d' % book_id) + ae(cache.field_for('cover', book_id), 1) + ae(old.cover(book_id, index_is_id=True), img, 'Cover was not set correctly for book %d' % book_id) + self.assertTrue(old.has_cover(book_id)) + # }}} + + def test_set_metadata(self): + ' Test setting of metadata ' + self.assertTrue(False, 'TODO: test set_metadata()') diff --git a/src/calibre/db/write.py b/src/calibre/db/write.py index 1bdbabf082..7fdb2070c0 100644 --- a/src/calibre/db/write.py +++ b/src/calibre/db/write.py @@ -461,7 +461,7 @@ class Writer(object): dt = field.metadata['datatype'] self.accept_vals = lambda x: True if dt == 'composite' or field.name in { - 'id', 'cover', 'size', 'path', 'formats', 'news'}: + 'id', 'size', 'path', 'formats', 'news'}: self.set_books_func = dummy elif self.name[0] == '#' and self.name.endswith('_index'): self.set_books_func = custom_series_index diff --git a/src/calibre/debug.py b/src/calibre/debug.py index 6ccaf750e2..7a1fe754fa 100644 --- a/src/calibre/debug.py +++ b/src/calibre/debug.py @@ -152,7 +152,8 @@ def add_simple_plugin(path_to_plugin): shutil.rmtree(tdir) def print_basic_debug_info(out=None): - if out is None: out = sys.stdout + if out is None: + out = sys.stdout out = functools.partial(prints, file=out) import platform from calibre.constants import (__appname__, get_version, isportable, isosx, @@ -175,7 +176,7 @@ def print_basic_debug_info(out=None): def run_debug_gui(logpath): import time - time.sleep(3) # Give previous GUI time to shutdown fully and release locks + time.sleep(3) # Give previous GUI time to shutdown fully and release locks from calibre.constants import __appname__ prints(__appname__, _('Debug log')) print_basic_debug_info() @@ -197,6 +198,12 @@ def run_script(path, args): g['__file__'] = ef execfile(ef, g) +def inspect_mobi(path): + from calibre.ebooks.mobi.debug.main import inspect_mobi + prints('Inspecting:', path) + inspect_mobi(path) + print + def main(args=sys.argv): from calibre.constants import debug debug() @@ -231,7 +238,7 @@ def main(args=sys.argv): main() elif opts.command: sys.argv = args - exec opts.command + exec(opts.command) elif opts.debug_device_driver: debug_device_driver() elif opts.add_simple_plugin is not None: @@ -246,11 +253,8 @@ def main(args=sys.argv): sql_dump = args[-1] reinit_db(opts.reinitialize_db, sql_dump=sql_dump) elif opts.inspect_mobi: - from calibre.ebooks.mobi.debug.main import inspect_mobi for path in args[1:]: - prints('Inspecting:', path) inspect_mobi(path) - print elif opts.tweak_book: from calibre.ebooks.tweak import tweak tweak(opts.tweak_book) @@ -274,6 +278,16 @@ def main(args=sys.argv): plugin.cli_main([plugin.name] + args[1:]) elif len(args) >= 2 and args[1].rpartition('.')[-1] in {'py', 'recipe'}: run_script(args[1], args[2:]) + elif len(args) >= 2 and args[1].rpartition('.')[-1] in {'mobi', 'azw', 'azw3', 'docx'}: + for path in args[1:]: + ext = path.rpartition('.')[-1] + if ext == 'docx': + from calibre.ebooks.docx.dump import dump + dump(path) + elif ext in {'mobi', 'azw', 'azw3'}: + inspect_mobi(path) + else: + print ('Cannot dump unknown filetype: %s' % path) else: from calibre import ipython ipython() @@ -282,3 +296,4 @@ def main(args=sys.argv): if __name__ == '__main__': sys.exit(main()) + diff --git a/src/calibre/devices/idevice/libimobiledevice.py b/src/calibre/devices/idevice/libimobiledevice.py index 08c9e24fd4..ca6ed57a77 100644 --- a/src/calibre/devices/idevice/libimobiledevice.py +++ b/src/calibre/devices/idevice/libimobiledevice.py @@ -1174,10 +1174,10 @@ class libiMobileDevice(): self.plist_lib.plist_free(plist) # To determine success, we need to inspect the returned plist - if hasattr(result, 'Status'): + if 'Status' in result: if self.verbose: self.log(" STATUS: %s" % result['Status']) - elif hasattr(result, 'Error'): + elif 'Error' in result: if self.verbose: self.log(" ERROR: %s" % result['Error']) raise libiMobileDeviceException(result['Error']) @@ -1293,7 +1293,9 @@ class libiMobileDevice(): else: index = 0 while devices[index]: - device_list.append(devices[index].contents.value) + # Filter out redundant entries + if devices[index].contents.value not in device_list: + device_list.append(devices[index].contents.value) index += 1 if self.verbose: self.log(" %s" % repr(device_list)) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 51cbdd8cf7..cddf6a561f 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -35,7 +35,7 @@ class KOBO(USBMS): gui_name = 'Kobo Reader' description = _('Communicate with the Kobo Reader') author = 'Timothy Legge and David Forrester' - version = (2, 0, 11) + version = (2, 0, 12) dbversion = 0 fwversion = 0 @@ -1218,7 +1218,7 @@ class KOBOTOUCH(KOBO): min_dbversion_images_on_sdcard = 77 min_dbversion_activiy = 77 - max_supported_fwversion = (2,5,3) + max_supported_fwversion = (2,6,1) min_fwversion_images_on_sdcard = (2,4,1) has_kepubs = True @@ -2381,9 +2381,17 @@ class KOBOTOUCH(KOBO): "WHERE Shelf.Name = C.ShelfName " "AND c._IsDeleted <> 'true')") + delete_activity_query = ("DELETE FROM Activity " + "WHERE Type = 'Shelf' " + "AND NOT EXISTS " + "(SELECT 1 FROM Shelf " + "WHERE Shelf.Name = Activity.Id)" + ) + cursor = connection.cursor() cursor.execute(delete_query) cursor.execute(update_query) + cursor.execute(delete_activity_query) connection.commit() cursor.close() diff --git a/src/calibre/ebooks/conversion/plugins/docx_input.py b/src/calibre/ebooks/conversion/plugins/docx_input.py new file mode 100644 index 0000000000..7492d46c68 --- /dev/null +++ b/src/calibre/ebooks/conversion/plugins/docx_input.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation + +class DOCXInput(InputFormatPlugin): + name = 'DOCX Input' + author = 'Kovid Goyal' + description = 'Convert DOCX files (.docx) to HTML' + file_types = set(['docx']) + + recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)]) + + def convert(self, stream, options, file_ext, log, accelerators): + from calibre.ebooks.docx.to_html import Convert + return Convert(stream, log=log)() + diff --git a/src/calibre/ebooks/conversion/plugins/html_input.py b/src/calibre/ebooks/conversion/plugins/html_input.py index 676a82cd3e..de783cfabe 100644 --- a/src/calibre/ebooks/conversion/plugins/html_input.py +++ b/src/calibre/ebooks/conversion/plugins/html_input.py @@ -87,7 +87,7 @@ class HTMLInput(InputFormatPlugin): return self._is_case_sensitive if not path or not os.path.exists(path): return islinux or isbsd - self._is_case_sensitive = not (os.path.exists(path.lower()) \ + self._is_case_sensitive = not (os.path.exists(path.lower()) and os.path.exists(path.upper())) return self._is_case_sensitive @@ -101,6 +101,8 @@ class HTMLInput(InputFormatPlugin): from calibre.ebooks.oeb.transforms.metadata import \ meta_info_to_oeb_metadata from calibre.ebooks.html.input import get_filelist + from calibre.ebooks.metadata import string_to_authors + from calibre.utils.localization import canonicalize_lang import cssutils, logging cssutils.log.setLevel(logging.WARN) self.OEB_STYLES = OEB_STYLES @@ -111,11 +113,20 @@ class HTMLInput(InputFormatPlugin): metadata = oeb.metadata meta_info_to_oeb_metadata(mi, metadata, log) if not metadata.language: - oeb.logger.warn(u'Language not specified') - metadata.add('language', get_lang().replace('_', '-')) + l = canonicalize_lang(getattr(opts, 'language', None)) + if not l: + oeb.logger.warn(u'Language not specified') + l = get_lang().replace('_', '-') + metadata.add('language', l) if not metadata.creator: - oeb.logger.warn('Creator not specified') - metadata.add('creator', self.oeb.translate(__('Unknown'))) + a = getattr(opts, 'authors', None) + if a: + a = string_to_authors(a) + if not a: + oeb.logger.warn('Creator not specified') + a = [self.oeb.translate(__('Unknown'))] + for aut in a: + metadata.add('creator', aut) if not metadata.title: oeb.logger.warn('Title not specified') metadata.add('title', self.oeb.translate(__('Unknown'))) @@ -175,7 +186,8 @@ class HTMLInput(InputFormatPlugin): titles = [] headers = [] for item in self.oeb.spine: - if not item.linear: continue + if not item.linear: + continue html = item.data title = ''.join(xpath(html, '/h:html/h:head/h:title/text()')) title = re.sub(r'\s+', ' ', title.strip()) @@ -193,7 +205,8 @@ class HTMLInput(InputFormatPlugin): if len(titles) > len(set(titles)): use = headers for title, item in izip(use, self.oeb.spine): - if not item.linear: continue + if not item.linear: + continue toc.add(title, item.href) oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True) @@ -291,3 +304,4 @@ class HTMLInput(InputFormatPlugin): self.log.exception('Failed to read CSS file: %r'%link) return (None, None) return (None, raw) + diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index b4533fbb80..16cd1a3343 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -87,9 +87,12 @@ def read_single_border(parent, edge): if sz is not None: # we dont care about art borders (they are only used for page borders) try: - width = min(96, max(2, float(sz))) / 8 + # WebKit needs at least 1pt to render borders + width = min(96, max(8, float(sz))) / 8 except (ValueError, TypeError): pass + if style == 'double' and width is not None and 0 < width < 3: + width = 3 # WebKit needs 3pts to render double borders return {p:v for p, v in zip(border_props, (padding, width, style, color))} def read_border(parent, dest, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'): @@ -297,7 +300,7 @@ class ParagraphStyle(object): # Misc. 'text_indent', 'text_align', 'line_height', 'direction', 'background_color', - 'numbering', 'font_family', 'font_size', 'frame', + 'numbering', 'font_family', 'font_size', 'color', 'frame', ) def __init__(self, pPr=None): @@ -321,7 +324,7 @@ class ParagraphStyle(object): for s in XPath('./w:pStyle[@w:val]')(pPr): self.linked_style = get(s, 'w:val') - self.font_family = self.font_size = inherit + self.font_family = self.font_size = self.color = inherit self._css = None @@ -365,7 +368,7 @@ class ParagraphStyle(object): if self.line_height not in {inherit, '1'}: c['line-height'] = self.line_height - for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size'): + for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size', 'color'): val = getattr(self, x) if val is not inherit: if x == 'font_size': diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index ca023e23af..02b8299c94 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -36,7 +36,8 @@ def read_text_border(parent, dest): if sz is not None: # we dont care about art borders (they are only used for page borders) try: - border_width = min(96, max(2, float(sz))) / 8 + # A border of less than 1pt is not rendered by WebKit + border_width = min(96, max(8, float(sz))) / 8 except (ValueError, TypeError): pass @@ -103,7 +104,7 @@ def read_underline(parent, dest): for col in XPath('./w:u[@w:val]')(parent): val = get(col, 'w:val') if val: - ans = 'underline' + ans = val if val == 'none' else 'underline' setattr(dest, 'text_decoration', ans) def read_vert_align(parent, dest): @@ -116,8 +117,12 @@ def read_vert_align(parent, dest): def read_font_family(parent, dest): ans = inherit - for col in XPath('./w:rFonts[@w:ascii]')(parent): - val = get(col, 'w:ascii') + for col in XPath('./w:rFonts')(parent): + val = get(col, 'w:asciiTheme') + if val: + val = '|%s|' % val + else: + val = get(col, 'w:ascii') if val: ans = val setattr(dest, 'font_family', ans) @@ -234,16 +239,5 @@ class RunStyle(object): return self._css def same_border(self, other): - for x in (self, other): - has_border = False - for y in ('color', 'style', 'width'): - if ('border-%s' % y) in x.css: - has_border = True - break - if not has_border: - return False - - s = tuple(self.css.get('border-%s' % y, None) for y in ('color', 'style', 'width')) - o = tuple(other.css.get('border-%s' % y, None) for y in ('color', 'style', 'width')) - return s == o + return self.get_border_css({}) == other.get_border_css({}) diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py new file mode 100644 index 0000000000..2b1e095025 --- /dev/null +++ b/src/calibre/ebooks/docx/cleanup.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + + +def mergeable(previous, current): + if previous.tail or current.tail: + return False + if previous.get('class', None) != current.get('class', None): + return False + if current.get('id', False): + return False + try: + return next(previous.itersiblings()) is current + except StopIteration: + return False + + +def append_text(parent, text): + if len(parent) > 0: + parent[-1].tail = (parent[-1].tail or '') + text + else: + parent.text = (parent.text or '') + text + + +def merge(parent, span): + if span.text: + append_text(parent, span.text) + for child in span: + parent.append(child) + if span.tail: + append_text(parent, span.tail) + span.getparent().remove(span) + + +def merge_run(run): + parent = run[0] + for span in run[1:]: + merge(parent, span) + + +def liftable(css): + # A is liftable if all its styling would work just as well if it is + # specified on the parent element. + prefixes = {x.partition('-')[0] for x in css.iterkeys()} + return not (prefixes - {'text', 'font', 'letter', 'color', 'background'}) + + +def add_text(elem, attr, text): + old = getattr(elem, attr) or '' + setattr(elem, attr, old + text) + + +def lift(span): + # Replace an element by its content (text, children and tail) + parent = span.getparent() + idx = parent.index(span) + try: + last_child = span[-1] + except IndexError: + last_child = None + + if span.text: + if idx == 0: + add_text(parent, 'text', span.text) + else: + add_text(parent[idx - 1], 'tail', span.text) + + for child in reversed(span): + parent.insert(idx, child) + parent.remove(span) + + if span.tail: + if last_child is None: + if idx == 0: + add_text(parent, 'text', span.tail) + else: + add_text(parent[idx - 1], 'tail', span.tail) + else: + add_text(last_child, 'tail', span.tail) + + +def cleanup_markup(root, styles): + # Merge consecutive spans that have the same styling + current_run = [] + for span in root.xpath('//span'): + if not current_run: + current_run.append(span) + else: + last = current_run[-1] + if mergeable(last, span): + current_run.append(span) + else: + if len(current_run) > 1: + merge_run(current_run) + current_run = [span] + + # Remove unnecessary span tags that are the only child of a parent block + # element + class_map = dict(styles.classes.itervalues()) + parents = ('p', 'div') + tuple('h%d' % i for i in xrange(1, 7)) + for parent in root.xpath('//*[(%s) and count(span)=1]' % ' or '.join('name()="%s"' % t for t in parents)): + if len(parent) == 1 and not parent.text and not parent[0].tail and not parent[0].get('id', None): + # We have a block whose contents are entirely enclosed in a + span = parent[0] + span_class = span.get('class', None) + span_css = class_map.get(span_class, {}) + if liftable(span_css): + pclass = parent.get('class', None) + if span_class: + pclass = (pclass + ' ' + span_class) if pclass else span_class + parent.set('class', pclass) + parent.text = span.text + parent.remove(span) + for child in span: + parent.append(child) + + # Make spans whose only styling is bold or italic into and tags + for span in root.xpath('//span[@class]'): + css = class_map.get(span.get('class', None), {}) + if len(css) == 1: + if css == {'font-style':'italic'}: + span.tag = 'i' + del span.attrib['class'] + elif css == {'font-weight':'bold'}: + span.tag = 'b' + del span.attrib['class'] + + # Get rid of s that have no styling + for span in root.xpath('//span[not(@class) and not(@id)]'): + lift(span) + diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index 7c048fa4be..68f74a3c82 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -39,7 +39,7 @@ def read_doc_props(raw, mi): for keywords in XPath('//cp:keywords')(root): if keywords.text and keywords.text.strip(): for x in keywords.text.split(): - tags.extend(y.strip() for y in x.split(',')) + tags.extend(y.strip() for y in x.split(',') if y.strip()) if tags: mi.tags = tags authors = XPath('//dc:creator')(root) diff --git a/src/calibre/ebooks/docx/dump.py b/src/calibre/ebooks/docx/dump.py index 6ebc2e8871..103236f05f 100644 --- a/src/calibre/ebooks/docx/dump.py +++ b/src/calibre/ebooks/docx/dump.py @@ -15,7 +15,7 @@ from calibre.utils.zipfile import ZipFile def dump(path): dest = os.path.splitext(os.path.basename(path))[0] - dest += '_extracted' + dest += '-dumped' if os.path.exists(dest): shutil.rmtree(dest) with ZipFile(path) as zf: diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index 5cee6a595d..e16bedb160 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -104,9 +104,12 @@ class Images(object): if rid in self.used: return self.used[rid] raw = self.docx.read(self.rid_map[rid]) - base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_') + base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' - base = base.rpartition('.')[0] + '.' + ext + base = base.rpartition('.')[0] + if not base: + base = 'image' + base += '.' + ext exists = frozenset(self.used.itervalues()) c = 1 while base in exists: @@ -132,7 +135,7 @@ class Images(object): src = self.generate_filename(rid, name) img = IMG(src='images/%s' % src) if alt: - img(alt=alt) + img.set('alt', alt) return img def drawing_to_html(self, drawing, page): @@ -157,6 +160,17 @@ class Images(object): ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems())) yield ans + def pict_to_html(self, pict, page): + for imagedata in XPath('descendant::v:imagedata[@r:id]')(pict): + rid = get(imagedata, 'r:id') + if rid in self.rid_map: + src = self.generate_filename(rid) + img = IMG(src='images/%s' % src, style="display:block") + alt = get(imagedata, 'o:title') + if alt: + img.set('alt', alt) + yield img + def get_float_properties(self, anchor, style, page): if 'display' not in style: style['display'] = 'block' @@ -200,6 +214,8 @@ class Images(object): if elem.tag.endswith('}drawing'): for tag in self.drawing_to_html(elem, page): yield tag - # TODO: Handle w:pict + else: + for tag in self.pict_to_html(elem, page): + yield tag diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py index 0f4e6155b1..ed9b9ea51f 100644 --- a/src/calibre/ebooks/docx/names.py +++ b/src/calibre/ebooks/docx/names.py @@ -7,7 +7,6 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' import re -from future_builtins import map from lxml.etree import XPath as X @@ -23,6 +22,7 @@ IMAGES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink' FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes' ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes' +THEMES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme' namespaces = { 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', @@ -84,11 +84,10 @@ def get(x, attr, default=None): return x.attrib.get(expand(attr), default) def ancestor(elem, name): - tag = expand(name) - while elem is not None: - elem = elem.getparent() - if getattr(elem, 'tag', None) == tag: - return elem + try: + return XPath('ancestor::%s[1]' % name)(elem)[0] + except IndexError: + return None def generate_anchor(name, existing): x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_') @@ -99,7 +98,7 @@ def generate_anchor(name, existing): return y def children(elem, *args): - return elem.iterchildren(*map(expand, args)) + return XPath('|'.join('child::%s' % a for a in args))(elem) def descendants(elem, *args): - return elem.iterdescendants(*map(expand, args)) + return XPath('|'.join('descendant::%s' % a for a in args))(elem) diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 72046ebda3..8e4d811803 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -142,8 +142,8 @@ class Styles(object): def get(self, key, default=None): return self.id_map.get(key, default) - def __call__(self, root, fonts): - self.fonts = fonts + def __call__(self, root, fonts, theme): + self.fonts, self.theme = fonts, theme for s in XPath('//w:style')(root): s = Style(s) if s.style_id: @@ -265,7 +265,8 @@ class Styles(object): def resolve_run(self, r): ans = self.run_cache.get(r, None) if ans is None: - p = r.getparent() + p = XPath('ancestor::w:p[1]')(r) + p = p[0] if p else None ans = self.run_cache[r] = RunStyle() direct_formatting = None for rPr in XPath('./w:rPr')(r): @@ -282,12 +283,16 @@ class Styles(object): default_char = self.default_styles.get('character', None) if self.default_character_style is not None: parent_styles.append(self.default_character_style) - ts = self.tables.run_style(p) - if ts is not None: - parent_styles.append(ts) pstyle = self.para_char_cache.get(p, None) if pstyle is not None: parent_styles.append(pstyle) + # As best as I can understand the spec, table overrides should be + # applied before paragraph overrides, but word does it + # this way, see the December 2007 table header in the demo + # document. + ts = self.tables.run_style(p) + if ts is not None: + parent_styles.append(ts) if direct_formatting.linked_style is not None: ls = self.get(direct_formatting.linked_style).character_style if ls is not None: @@ -299,7 +304,8 @@ class Styles(object): setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr)) if ans.font_family is not inherit: - ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i) + ff = self.theme.resolve_font_family(ans.font_family) + ans.font_family = self.fonts.family_for(ff, ans.b, ans.i) return ans @@ -312,51 +318,63 @@ class Styles(object): def cascade(self, layers): self.body_font_family = 'serif' self.body_font_size = '10pt' + self.body_color = 'black' + + def promote_property(char_styles, block_style, prop): + vals = {getattr(s, prop) for s in char_styles} + if len(vals) == 1: + # All the character styles have the same value + for s in char_styles: + setattr(s, prop, inherit) + setattr(block_style, prop, next(iter(vals))) for p, runs in layers.iteritems(): + has_links = '1' in {r.get('is-link', None) for r in runs} char_styles = [self.resolve_run(r) for r in runs] block_style = self.resolve_paragraph(p) - c = Counter() + for prop in ('font_family', 'font_size', 'color'): + if has_links and prop == 'color': + # We cannot promote color as browser rendering engines will + # override the link color setting it to blue, unless the + # color is specified on the link element itself + continue + promote_property(char_styles, block_style, prop) for s in char_styles: - if s.font_family is not inherit: - c[s.font_family] += 1 + if s.text_decoration == 'none': + # The default text decoration is 'none' + s.text_decoration = inherit + + def promote_most_common(block_styles, prop, default): + c = Counter() + for s in block_styles: + val = getattr(s, prop) + if val is not inherit: + c[val] += 1 + val = None if c: - family = c.most_common(1)[0][0] - block_style.font_family = family - for s in char_styles: - if s.font_family == family: - s.font_family = inherit + val = c.most_common(1)[0][0] + for s in block_styles: + oval = getattr(s, prop) + if oval is inherit: + if default != val: + setattr(s, prop, default) + elif oval == val: + setattr(s, prop, inherit) + return val - sizes = [s.font_size for s in char_styles if s.font_size is not inherit] - if sizes: - sz = block_style.font_size = sizes[0] - for s in char_styles: - if s.font_size == sz: - s.font_size = inherit + block_styles = tuple(self.resolve_paragraph(p) for p in layers) - block_styles = [self.resolve_paragraph(p) for p in layers] - c = Counter() - for s in block_styles: - if s.font_family is not inherit: - c[s.font_family] += 1 + ff = promote_most_common(block_styles, 'font_family', self.body_font_family) + if ff is not None: + self.body_font_family = ff - if c: - self.body_font_family = family = c.most_common(1)[0][0] - for s in block_styles: - if s.font_family == family: - s.font_family = inherit + fs = promote_most_common(block_styles, 'font_size', int(self.body_font_size[:2])) + if fs is not None: + self.body_font_size = '%.3gpt' % fs - c = Counter() - for s in block_styles: - if s.font_size is not inherit: - c[s.font_size] += 1 - - if c: - sz = c.most_common(1)[0][0] - for s in block_styles: - if s.font_size == sz: - s.font_size = inherit - self.body_font_size = '%.3gpt' % sz + color = promote_most_common(block_styles, 'color', self.body_color) + if color is not None: + self.body_color = color def resolve_numbering(self, numbering): # When a numPr element appears inside a paragraph style, the lvl info @@ -398,9 +416,7 @@ class Styles(object): ef = self.fonts.embed_fonts(dest_dir, docx) prefix = textwrap.dedent( '''\ - body { font-family: %s; font-size: %s } - - p { text-indent: 1.5em } + body { font-family: %s; font-size: %s; color: %s } ul, ol, p { margin: 0; padding: 0 } @@ -416,7 +432,7 @@ class Styles(object): dl.notes dd:last-of-type { page-break-after: avoid } - ''') % (self.body_font_family, self.body_font_size) + ''') % (self.body_font_family, self.body_font_size, self.body_color) if ef: prefix = ef + '\n' + prefix @@ -427,3 +443,4 @@ class Styles(object): ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';'))) return prefix + '\n' + '\n'.join(ans) + diff --git a/src/calibre/ebooks/docx/tables.py b/src/calibre/ebooks/docx/tables.py index 6732533b84..2e0296bfaa 100644 --- a/src/calibre/ebooks/docx/tables.py +++ b/src/calibre/ebooks/docx/tables.py @@ -8,11 +8,14 @@ __copyright__ = '2013, Kovid Goyal ' from lxml.html.builder import TABLE, TR, TD -from calibre.ebooks.docx.block_styles import inherit, read_shd, read_border, binary_property, border_props, ParagraphStyle # noqa +from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle from calibre.ebooks.docx.char_styles import RunStyle from calibre.ebooks.docx.names import XPath, get, is_tag # Read from XML {{{ +read_shd = rs +edges = ('left', 'top', 'right', 'bottom') + def _read_width(elem): ans = inherit try: @@ -44,13 +47,13 @@ def read_cell_width(parent, dest): def read_padding(parent, dest): name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar' - left = top = bottom = right = inherit + ans = {x:inherit for x in edges} for mar in XPath('./w:%s' % name)(parent): - for x in ('left', 'top', 'right', 'bottom'): + for x in edges: for edge in XPath('./w:%s' % x)(mar): - locals()[x] = _read_width(edge) - for x in ('left', 'top', 'right', 'bottom'): - setattr(dest, 'cell_padding_%s' % x, locals()[x]) + ans[x] = _read_width(edge) + for x in edges: + setattr(dest, 'cell_padding_%s' % x, ans[x]) def read_justification(parent, dest): left = right = inherit @@ -73,6 +76,12 @@ def read_spacing(parent, dest): ans = _read_width(cs) setattr(dest, 'spacing', ans) +def read_float(parent, dest): + ans = inherit + for x in XPath('./w:tblpPr')(parent): + ans = {k.rpartition('}')[-1]: v for k, v in x.attrib.iteritems()} + setattr(dest, 'float', ans) + def read_indent(parent, dest): ans = inherit for cs in XPath('./w:tblInd')(parent): @@ -139,40 +148,124 @@ def read_look(parent, dest): # }}} def clone(style): - ans = type(style)() + try: + ans = type(style)() + except TypeError: + return None ans.update(style) return ans -class RowStyle(object): +class Style(object): + + def update(self, other): + for prop in self.all_properties: + nval = getattr(other, prop) + if nval is not inherit: + setattr(self, prop, nval) + + def convert_spacing(self): + ans = {} + if self.spacing is not inherit: + if self.spacing in {'auto', '0'}: + ans['border-collapse'] = 'collapse' + else: + ans['border-collapse'] = 'separate' + ans['border-spacing'] = self.spacing + return ans + + def convert_border(self): + c = {} + for x in edges: + for prop in border_props: + prop = prop % x + if prop.startswith('border'): + val = getattr(self, prop) + if val is not inherit: + if isinstance(val, (int, float)): + val = '%.3gpt' % val + c[prop.replace('_', '-')] = val + return c + +class RowStyle(Style): all_properties = ('height', 'cantSplit', 'hidden', 'spacing',) - def __init__(self, tcPr=None): - if tcPr is None: - for p in self.all_properties: - setattr(self, p, inherit) - else: - pass - -class CellStyle(object): - - all_properties = ('background_color', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', - 'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge', - ) + tuple(k % edge for edge in border_edges for k in border_props) - def __init__(self, trPr=None): if trPr is None: for p in self.all_properties: setattr(self, p, inherit) + else: + for p in ('hidden', 'cantSplit'): + setattr(self, p, binary_property(trPr, p)) + for p in ('spacing', 'height'): + f = globals()['read_%s' % p] + f(trPr, self) + self._css = None + + @property + def css(self): + if self._css is None: + c = self._css = {} + if self.hidden is True: + c['display'] = 'none' + if self.cantSplit is True: + c['page-break-inside'] = 'avoid' + if self.height is not inherit: + rule, val = self.height + if rule != 'auto': + try: + c['min-height' if rule == 'atLeast' else 'height'] = '%.3gpt' % (int(val)/20) + except (ValueError, TypeError): + pass + c.update(self.convert_spacing()) + return self._css + +class CellStyle(Style): + + all_properties = ('background_color', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', + 'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge', 'row_span', + ) + tuple(k % edge for edge in border_edges for k in border_props) + + def __init__(self, tcPr=None): + if tcPr is None: + for p in self.all_properties: + setattr(self, p, inherit) else: for x in ('borders', 'shd', 'padding', 'cell_width', 'vertical_align', 'col_span', 'merge'): f = globals()['read_%s' % x] - f(trPr, self) + f(tcPr, self) + self.row_span = inherit + self._css = None -class TableStyle(object): + @property + def css(self): + if self._css is None: + self._css = c = {} + if self.background_color is not inherit: + c['background-color'] = self.background_color + if self.width not in (inherit, 'auto'): + c['width'] = self.width + c['vertical-align'] = 'top' if self.vertical_align is inherit else self.vertical_align + for x in edges: + val = getattr(self, 'cell_padding_%s' % x) + if val not in (inherit, 'auto'): + c['padding-%s' % x] = val + elif val is inherit and x in {'left', 'right'}: + c['padding-%s' % x] = '%.3gpt' % (115/20) + # In Word, tables are apparently rendered with some default top and + # bottom padding irrespective of the cellMargin values. Simulate + # that here. + for x in ('top', 'bottom'): + if c.get('padding-%s' % x, '0pt') == '0pt': + c['padding-%s' % x] = '0.5ex' + c.update(self.convert_border()) + + return self._css + +class TableStyle(Style): all_properties = ( - 'width', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', + 'width', 'float', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', 'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color', 'spacing', 'indent', 'overrides', 'col_band_size', 'row_band_size', 'look', ) + tuple(k % edge for edge in border_edges for k in border_props) @@ -183,7 +276,7 @@ class TableStyle(object): setattr(self, p, inherit) else: self.overrides = inherit - for x in ('width', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'): + for x in ('width', 'float', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'): f = globals()['read_%s' % x] f(tblPr, self) parent = tblPr.getparent() @@ -197,17 +290,12 @@ class TableStyle(object): for trPr in XPath('./w:trPr')(tblStylePr): orides['row'] = RowStyle(trPr) for tcPr in XPath('./w:tcPr')(tblStylePr): - orides['cell'] = tcPr + orides['cell'] = CellStyle(tcPr) for pPr in XPath('./w:pPr')(tblStylePr): orides['para'] = ParagraphStyle(pPr) for rPr in XPath('./w:rPr')(tblStylePr): orides['run'] = RunStyle(rPr) - - def update(self, other): - for prop in self.all_properties: - nval = getattr(other, prop) - if nval is not inherit: - setattr(self, prop, nval) + self._css = None def resolve_based_on(self, parent): for p in self.all_properties: @@ -215,11 +303,50 @@ class TableStyle(object): if val is inherit: setattr(self, p, getattr(parent, p)) + @property + def css(self): + if self._css is None: + c = self._css = {} + if self.width not in (inherit, 'auto'): + c['width'] = self.width + for x in ('background_color', 'margin_left', 'margin_right'): + val = getattr(self, x) + if val is not inherit: + c[x.replace('_', '-')] = val + if self.indent not in (inherit, 'auto') and self.margin_left != 'auto': + c['margin-left'] = self.indent + if self.float is not inherit: + for x in ('left', 'top', 'right', 'bottom'): + val = self.float.get('%sFromText' % x, 0) + try: + val = '%.3gpt' % (int(val) / 20) + except (ValueError, TypeError): + val = '0' + c['margin-%s' % x] = val + if 'tblpXSpec' in self.float: + c['float'] = 'right' if self.float['tblpXSpec'] in {'right', 'outside'} else 'left' + else: + page = self.page + page_width = page.width - page.margin_left - page.margin_right + try: + x = int(self.float['tblpX']) / 20 + except (KeyError, ValueError, TypeError): + x = 0 + c['float'] = 'left' if (x/page_width) < 0.65 else 'right' + c.update(self.convert_spacing()) + if 'border-collapse' not in c: + c['border-collapse'] = 'collapse' + c.update(self.convert_border()) + + return self._css + + class Table(object): - def __init__(self, tbl, styles, para_map): + def __init__(self, tbl, styles, para_map, is_sub_table=False): self.tbl = tbl self.styles = styles + self.is_sub_table = is_sub_table # Read Table Style style = {'table':TableStyle()} @@ -243,21 +370,33 @@ class Table(object): style['table'].update(TableStyle(tblPr)) self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None) self.run_style = style.get('run', None) + self.overrides = self.table_style.overrides + if self.overrides is inherit: + self.overrides = {} + if 'wholeTable' in self.overrides and 'table' in self.overrides['wholeTable']: + self.table_style.update(self.overrides['wholeTable']['table']) self.style_map = {} self.paragraphs = [] + self.cell_map = [] rows = XPath('./w:tr')(tbl) for r, tr in enumerate(rows): + overrides = self.get_overrides(r, None, len(rows), None) + self.resolve_row_style(tr, overrides) cells = XPath('./w:tc')(tr) + self.cell_map.append([]) for c, tc in enumerate(cells): overrides = self.get_overrides(r, c, len(rows), len(cells)) + self.resolve_cell_style(tc, overrides, r, c, len(rows), len(cells)) + self.cell_map[-1].append(tc) for p in XPath('./w:p')(tc): para_map[p] = self self.paragraphs.append(p) self.resolve_para_style(p, overrides) - self.sub_tables = {x:Table(x, styles, para_map) for x in XPath('./w:tr/w:tc/w:tbl')(tbl)} + self.handle_merged_cells() + self.sub_tables = {x:Table(x, styles, para_map, is_sub_table=True) for x in XPath('./w:tr/w:tc/w:tbl')(tbl)} def override_allowed(self, name): 'Check if the named override is allowed by the tblLook element' @@ -279,37 +418,102 @@ class Table(object): overrides = ['wholeTable'] def divisor(m, n): return (m - (m % n)) // n - odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 0 - overrides.append('band%dVert' % (1 if odd_column_band else 2)) - odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 0 + if c is not None: + odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 1 + overrides.append('band%dVert' % (1 if odd_column_band else 2)) + odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1 overrides.append('band%dHorz' % (1 if odd_row_band else 2)) + + # According to the OOXML spec columns should have higher override + # priority than rows, but Word seems to do it the other way around. + if c is not None: + if c == 0: + overrides.append('firstCol') + if c >= num_of_cols_in_row - 1: + overrides.append('lastCol') if r == 0: overrides.append('firstRow') if r >= num_of_rows - 1: overrides.append('lastRow') - if c == 0: - overrides.append('firstCol') - if c >= num_of_cols_in_row - 1: - overrides.append('lastCol') - if r == 0: - if c == 0: - overrides.append('nwCell') - if c == num_of_cols_in_row - 1: - overrides.append('neCell') - if r == num_of_rows - 1: - if c == 0: - overrides.append('swCell') - if c == num_of_cols_in_row - 1: - overrides.append('seCell') + if c is not None: + if r == 0: + if c == 0: + overrides.append('nwCell') + if c == num_of_cols_in_row - 1: + overrides.append('neCell') + if r == num_of_rows - 1: + if c == 0: + overrides.append('swCell') + if c == num_of_cols_in_row - 1: + overrides.append('seCell') return tuple(filter(self.override_allowed, overrides)) + def resolve_row_style(self, tr, overrides): + rs = RowStyle() + for o in overrides: + if o in self.overrides: + ovr = self.overrides[o] + ors = ovr.get('row', None) + if ors is not None: + rs.update(ors) + + for trPr in XPath('./w:trPr')(tr): + rs.update(RowStyle(trPr)) + self.style_map[tr] = rs + + def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row): + cs = CellStyle() + # from lxml.etree import tostring + # txt = tostring(tc, method='text', encoding=unicode) + for o in overrides: + if o in self.overrides: + ovr = self.overrides[o] + ors = ovr.get('cell', None) + if ors is not None: + cs.update(ors) + + for tcPr in XPath('./w:tcPr')(tc): + cs.update(CellStyle(tcPr)) + + for x in edges: + p = 'cell_padding_%s' % x + val = getattr(cs, p) + if val is inherit: + setattr(cs, p, getattr(self.table_style, p)) + + is_inside_edge = ( + (x == 'left' and col > 0) or + (x == 'top' and row > 0) or + (x == 'right' and col < cols_in_row - 1) or + (x == 'bottom' and row < rows -1) + ) + inside_edge = ('insideH' if x in {'top', 'bottom'} else 'insideV') if is_inside_edge else None + for prop in border_props: + if not prop.startswith('border'): + continue + eprop = prop % x + iprop = (prop % inside_edge) if inside_edge else None + val = getattr(cs, eprop) + if val is inherit and iprop is not None: + # Use the insideX borders if the main cell borders are not + # specified + val = getattr(cs, iprop) + if val is inherit: + val = getattr(self.table_style, iprop) + if not is_inside_edge and val == 'none': + # Cell borders must override table borders even when the + # table border is not null and the cell border is null. + val = 'hidden' + setattr(cs, eprop, val) + + self.style_map[tc] = cs + def resolve_para_style(self, p, overrides): - text_styles = [None if self.paragraph_style is None else clone(self.paragraph_style), - None if self.run_style is None else clone(self.run_style)] + text_styles = [clone(self.paragraph_style), clone(self.run_style)] for o in overrides: - if o in self.table_style.overrides: - ovr = self.table_style.overrides[o] + if o in self.overrides: + ovr = self.overrides[o] for i, name in enumerate(('para', 'run')): ops = ovr.get(name, None) if ops is not None: @@ -319,6 +523,55 @@ class Table(object): text_styles[i].update(ops) self.style_map[p] = text_styles + def handle_merged_cells(self): + if not self.cell_map: + return + # Handle vMerge + max_col_num = max(len(r) for r in self.cell_map) + for c in xrange(max_col_num): + cells = [row[c] if c < len(row) else None for row in self.cell_map] + runs = [[]] + for cell in cells: + try: + s = self.style_map[cell] + except KeyError: # cell is None + s = CellStyle() + if s.vMerge == 'restart': + runs.append([cell]) + elif s.vMerge == 'continue': + runs[-1].append(cell) + else: + runs.append([]) + for run in runs: + if len(run) > 1: + self.style_map[run[0]].row_span = len(run) + for tc in run[1:]: + tc.getparent().remove(tc) + + # Handle hMerge + for cells in self.cell_map: + runs = [[]] + for cell in cells: + try: + s = self.style_map[cell] + except KeyError: # cell is None + s = CellStyle() + if s.col_span is not inherit: + runs.append([]) + continue + if s.hMerge == 'restart': + runs.append([cell]) + elif s.hMerge == 'continue': + runs[-1].append(cell) + else: + runs.append([]) + + for run in runs: + if len(run) > 1: + self.style_map[run[0]].col_span = len(run) + for tc in run[1:]: + tc.getparent().remove(tc) + def __iter__(self): for p in self.paragraphs: yield p @@ -326,8 +579,10 @@ class Table(object): for p in t: yield p - def apply_markup(self, rmap, parent=None): + def apply_markup(self, rmap, page, parent=None): table = TABLE('\n\t\t') + self.table_style.page = page + style_map = {} if parent is None: try: first_para = rmap[next(iter(self))] @@ -340,36 +595,53 @@ class Table(object): parent.append(table) for row in XPath('./w:tr')(self.tbl): tr = TR('\n\t\t\t') + style_map[tr] = self.style_map[row] tr.tail = '\n\t\t' table.append(tr) for tc in XPath('./w:tc')(row): td = TD() + style_map[td] = s = self.style_map[tc] + if s.col_span is not inherit: + td.set('colspan', type('')(s.col_span)) + if s.row_span is not inherit: + td.set('rowspan', type('')(s.row_span)) td.tail = '\n\t\t\t' tr.append(td) for x in XPath('./w:p|./w:tbl')(tc): if x.tag.endswith('}p'): td.append(rmap[x]) else: - self.sub_tables[x].apply_markup(rmap, parent=td) + self.sub_tables[x].apply_markup(rmap, page, parent=td) if len(tr): tr[-1].tail = '\n\t\t' if len(table): table[-1].tail = '\n\t' + table_style = self.table_style.css + if table_style: + table.set('class', self.styles.register(table_style, 'table')) + for elem, style in style_map.iteritems(): + css = style.css + if css: + elem.set('class', self.styles.register(css, elem.tag)) class Tables(object): def __init__(self): self.tables = [] self.para_map = {} + self.sub_tables = set() def register(self, tbl, styles): + if tbl in self.sub_tables: + return self.tables.append(Table(tbl, styles, self.para_map)) + self.sub_tables |= set(self.tables[-1].sub_tables) - def apply_markup(self, object_map): + def apply_markup(self, object_map, page_map): rmap = {v:k for k, v in object_map.iteritems()} for table in self.tables: - table.apply_markup(rmap) + table.apply_markup(rmap, page_map[table.tbl]) def para_style(self, p): table = self.para_map.get(p, None) diff --git a/src/calibre/ebooks/docx/theme.py b/src/calibre/ebooks/docx/theme.py new file mode 100644 index 0000000000..e4a75f629c --- /dev/null +++ b/src/calibre/ebooks/docx/theme.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from calibre.ebooks.docx.names import XPath + + +class Theme(object): + + def __init__(self): + self.major_latin_font = 'Cambria' + self.minor_latin_font = 'Calibri' + + def __call__(self, root): + for fs in XPath('//a:fontScheme')(root): + for mj in XPath('./a:majorFont')(fs): + for l in XPath('./a:latin[@typeface]')(mj): + self.major_latin_font = l.get('typeface') + for mj in XPath('./a:minorFont')(fs): + for l in XPath('./a:latin[@typeface]')(mj): + self.minor_latin_font = l.get('typeface') + + def resolve_font_family(self, ff): + if ff.startswith('|'): + ff = ff[1:-1] + ff = self.major_latin_font if ff.startswith('major') else self.minor_latin_font + return ff diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 2f945e8980..2e57914e55 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -16,13 +16,15 @@ from lxml.html.builder import ( from calibre.ebooks.docx.container import DOCX, fromstring from calibre.ebooks.docx.names import ( XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor, - descendants, ancestor, FOOTNOTES, ENDNOTES) + descendants, FOOTNOTES, ENDNOTES, children, THEMES) from calibre.ebooks.docx.styles import Styles, inherit, PageProperties from calibre.ebooks.docx.numbering import Numbering from calibre.ebooks.docx.fonts import Fonts from calibre.ebooks.docx.images import Images from calibre.ebooks.docx.tables import Tables from calibre.ebooks.docx.footnotes import Footnotes +from calibre.ebooks.docx.cleanup import cleanup_markup +from calibre.ebooks.docx.theme import Theme from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.oeb.polish.toc import elem_to_toc_text @@ -41,11 +43,14 @@ class Convert(object): def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) + self.ms_pat = re.compile(r'\s{2,}') + self.ws_pat = re.compile(r'[\n\r\t]') self.log = self.docx.log self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() + self.theme = Theme() self.tables = Tables() self.styles = Styles(self.tables) self.images = Images() @@ -82,11 +87,13 @@ class Convert(object): self.anchor_map = {} self.link_map = defaultdict(list) + self.log.debug('Converting Word markup to HTML') self.read_page_properties(doc) for wp, page_properties in self.page_map.iteritems(): self.current_page = page_properties - p = self.convert_p(wp) - self.body.append(p) + if wp.tag.endswith('}p'): + p = self.convert_p(wp) + self.body.append(p) notes_header = None if self.footnotes.has_notes: @@ -103,6 +110,7 @@ class Convert(object): for wp in note: if wp.tag.endswith('}tbl'): self.tables.register(wp, self.styles) + self.page_map[wp] = self.current_page p = self.convert_p(wp) dl[-1].append(p) @@ -110,7 +118,7 @@ class Convert(object): self.styles.cascade(self.layers) - self.tables.apply_markup(self.object_map) + self.tables.apply_markup(self.object_map, self.page_map) numbered = [] for html_obj, obj in self.object_map.iteritems(): @@ -131,6 +139,7 @@ class Convert(object): child.tail = '\n\t' self.body[-1].tail = '\n' + self.log.debug('Converting styles to CSS') self.styles.generate_classes() for html_obj, obj in self.object_map.iteritems(): style = self.styles.resolve(obj) @@ -146,13 +155,16 @@ class Convert(object): html_obj.set('class', cls) if notes_header is not None: - for h in self.body.iterchildren('h1', 'h2', 'h3'): + for h in children(self.body, 'h1', 'h2', 'h3'): notes_header.tag = h.tag cls = h.get('class', None) if cls and cls != 'notes-header': notes_header.set('class', '%s notes-header' % cls) break + self.log.debug('Cleaning up redundant markup generated by Word') + cleanup_markup(self.html, self.styles) + return self.write() def read_page_properties(self, doc): @@ -162,6 +174,7 @@ class Convert(object): for p in descendants(doc, 'w:p', 'w:tbl'): if p.tag.endswith('}tbl'): self.tables.register(p, self.styles) + current.append(p) continue sect = tuple(descendants(p, 'w:sectPr')) if sect: @@ -192,6 +205,7 @@ class Convert(object): nname = get_name(NUMBERING, 'numbering.xml') sname = get_name(STYLES, 'styles.xml') fname = get_name(FONTS, 'fontTable.xml') + tname = get_name(THEMES, 'theme1.xml') foname = get_name(FOOTNOTES, 'footnotes.xml') enname = get_name(ENDNOTES, 'endnotes.xml') numbering = self.numbering = Numbering() @@ -220,13 +234,21 @@ class Convert(object): else: fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir) + if tname is not None: + try: + raw = self.docx.read(tname) + except KeyError: + self.log.warn('Styles %s do not exist' % sname) + else: + self.theme(fromstring(raw)) + if sname is not None: try: raw = self.docx.read(sname) except KeyError: self.log.warn('Styles %s do not exist' % sname) else: - self.styles(fromstring(raw), fonts) + self.styles(fromstring(raw), fonts, self.theme) if nname is not None: try: @@ -259,7 +281,7 @@ class Convert(object): elem.set('id', ans) return ans - for item in root.iterdescendants(*headings): + for item in descendants(root, *headings): lvl = plvl = item_level_map.get(item, None) if lvl is None: continue @@ -305,6 +327,7 @@ class Convert(object): current_anchor = None current_hyperlink = None + hl_xpath = XPath('ancestor::w:hyperlink[1]') for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'): if x.tag.endswith('}r'): @@ -313,10 +336,11 @@ class Convert(object): (dest if len(dest) == 0 else span).set('id', current_anchor) current_anchor = None if current_hyperlink is not None: - hl = ancestor(x, 'w:hyperlink') - if hl is not None: + try: + hl = hl_xpath(x)[0] self.link_map[hl].append(span) - else: + x.set('is-link', '1') + except IndexError: current_hyperlink = None dest.append(span) self.layers[p].append(x) @@ -359,6 +383,10 @@ class Convert(object): wrapper = self.wrap_elems(spans, SPAN()) wrapper.set('class', cls) + if not dest.text and len(dest) == 0: + # Empty paragraph add a non-breaking space so that it is rendered + # by WebKit + dest.text = '\xa0' return dest def wrap_elems(self, elems, wrapper): @@ -406,8 +434,15 @@ class Convert(object): if not child.text: continue space = child.get(XML('space'), None) + preserve = False if space == 'preserve': - text.add_elem(SPAN(child.text, style="whitespace:pre-wrap")) + # Only use a with white-space:pre-wrap if this element + # actually needs it, i.e. if it has more than one + # consecutive space or it has newlines or tabs. + multi_spaces = self.ms_pat.search(child.text) is not None + preserve = multi_spaces or self.ws_pat.search(child.text) is not None + if preserve: + text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: text.buf.append(child.text) @@ -415,7 +450,7 @@ class Convert(object): text.add_elem(BR()) ans.append(text.elem) elif is_tag(child, 'w:br'): - typ = child.get('type', None) + typ = get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: @@ -437,6 +472,8 @@ class Convert(object): l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) + elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate': + text.buf.append('\xa0') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) diff --git a/src/calibre/ebooks/metadata/docx.py b/src/calibre/ebooks/metadata/docx.py index 31b0c48974..ea34d27d3a 100644 --- a/src/calibre/ebooks/metadata/docx.py +++ b/src/calibre/ebooks/metadata/docx.py @@ -27,7 +27,7 @@ def get_metadata(stream): width, height, fmt = identify_data(raw) except: continue - if 0.8 <= height/width <= 1.8 and height*width >= 12000: + if 0.8 <= height/width <= 1.8 and height*width >= 160000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index 0bb66ec843..26edeab24c 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -489,7 +489,7 @@ class MobiMLizer(object): if elem.text: if istate.preserve: text = elem.text - elif (len(elem) > 0 and isspace(elem.text) and elem[0].tag and + elif (len(elem) > 0 and isspace(elem.text) and hasattr(elem[0].tag, 'rpartition') and elem[0].tag.rpartition('}')[-1] not in INLINE_TAGS): text = None else: diff --git a/src/calibre/ebooks/mobi/writer8/header.py b/src/calibre/ebooks/mobi/writer8/header.py index 94ae722f59..19e64ecc09 100644 --- a/src/calibre/ebooks/mobi/writer8/header.py +++ b/src/calibre/ebooks/mobi/writer8/header.py @@ -36,7 +36,8 @@ class Header(OrderedDict): for line in self.DEFINITION.splitlines(): line = line.strip() - if not line or line.startswith('#'): continue + if not line or line.startswith('#'): + continue name, val = [x.strip() for x in line.partition('=')[0::2]] if val: val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None, @@ -66,7 +67,7 @@ class Header(OrderedDict): if val is None: raise ValueError('Dynamic field %r not set'%name) if isinstance(val, (int, long)): - fmt = 'H' if name in self.SHORT_FIELDS else 'I' + fmt = b'H' if name in self.SHORT_FIELDS else b'I' val = pack(b'>'+fmt, val) buf.write(val) @@ -79,8 +80,8 @@ class Header(OrderedDict): ans = align_block(ans) return ans - def format_value(self, name, val): return val + diff --git a/src/calibre/ebooks/oeb/iterator/book.py b/src/calibre/ebooks/oeb/iterator/book.py index 28dd37a88e..4ebd543aab 100644 --- a/src/calibre/ebooks/oeb/iterator/book.py +++ b/src/calibre/ebooks/oeb/iterator/book.py @@ -125,7 +125,7 @@ class EbookIterator(BookmarksMixin): [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, - run_char_count=run_char_count) + run_char_count=run_char_count, from_epub=self.book_format == 'EPUB') is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path diff --git a/src/calibre/ebooks/oeb/iterator/spine.py b/src/calibre/ebooks/oeb/iterator/spine.py index 7b404d4f74..86ab7bcf78 100644 --- a/src/calibre/ebooks/oeb/iterator/spine.py +++ b/src/calibre/ebooks/oeb/iterator/spine.py @@ -36,14 +36,30 @@ def anchor_map(html): class SpineItem(unicode): def __new__(cls, path, mime_type=None, read_anchor_map=True, - run_char_count=True): + run_char_count=True, from_epub=False): ppath = path.partition('#')[0] if not os.path.exists(path) and os.path.exists(ppath): path = ppath obj = super(SpineItem, cls).__new__(cls, path) with open(path, 'rb') as f: raw = f.read() - raw, obj.encoding = xml_to_unicode(raw) + if from_epub: + # According to the spec, HTML in EPUB must be encoded in utf-8 or + # utf-16. Furthermore, there exist epub files produced by the usual + # incompetents that have utf-8 encoded HTML files that contain + # incorrect encoding declarations. See + # http://www.idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#Section1.4.1.2 + # http://www.idpf.org/epub/30/spec/epub30-publications.html#confreq-xml-enc + # https://bugs.launchpad.net/bugs/1188843 + # So we first decode with utf-8 and only if that fails we try xml_to_unicode. This + # is the same algorithm as that used by the conversion pipeline (modulo + # some BOM based detection). Sigh. + try: + raw, obj.encoding = raw.decode('utf-8'), 'utf-8' + except UnicodeDecodeError: + raw, obj.encoding = xml_to_unicode(raw) + else: + raw, obj.encoding = xml_to_unicode(raw) obj.character_count = character_count(raw) if run_char_count else 10000 obj.anchor_map = anchor_map(raw) if read_anchor_map else {} obj.start_page = -1 @@ -100,22 +116,24 @@ class IndexEntry(object): self.end_anchor = None def create_indexing_data(spine, toc): - if not toc: return + if not toc: + return f = partial(IndexEntry, spine) index_entries = list(map(f, (t for t in toc.flat() if t is not toc), (i-1 for i, t in enumerate(toc.flat()) if t is not toc) )) index_entries.sort(key=attrgetter('sort_key')) - [ i.find_end(index_entries) for i in index_entries ] + [i.find_end(index_entries) for i in index_entries] ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor') for spine_pos, spine_item in enumerate(spine): for i in index_entries: if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos: - continue # Does not touch this file + continue # Does not touch this file start = i.anchor if i.spine_pos == spine_pos else None end = i.end_anchor if i.spine_pos == spine_pos else None spine_item.index_entries.append(ie(i, start, end)) + diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 9e4c2a70e6..605a58a31f 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -353,12 +353,19 @@ class FlowSplitter(object): nix_element(elem) # Tree 2 + ancestors = frozenset(XPath('ancestor::*')(split_point2)) for elem in tuple(body2.iterdescendants()): if elem is split_point2: if not before: nix_element(elem) break - nix_element(elem, top=False) + if elem in ancestors: + # We have to preserve the ancestors as they could have CSS + # styles that are inherited/applicable, like font or + # width. So we only remove the text, if any. + elem.text = '\n' + else: + nix_element(elem, top=False) body2.text = '\n' diff --git a/src/calibre/gui2/convert/page_setup.py b/src/calibre/gui2/convert/page_setup.py index a0ca16297c..ac93557dd1 100644 --- a/src/calibre/gui2/convert/page_setup.py +++ b/src/calibre/gui2/convert/page_setup.py @@ -27,7 +27,13 @@ class ProfileModel(QAbstractListModel): if role == Qt.DisplayRole: return QVariant(profile.name) if role in (Qt.ToolTipRole, Qt.StatusTipRole, Qt.WhatsThisRole): - return QVariant(profile.description) + w, h = profile.screen_size + if w >= 10000: + ss = _('unlimited') + else: + ss = _('%d x %d pixels') % (w, h) + ss = _('Screen size: %s') % ss + return QVariant('%s [%s]' % (profile.description, ss)) return NONE class PageSetupWidget(Widget, Ui_Form): diff --git a/src/calibre/gui2/init.py b/src/calibre/gui2/init.py index 52ff70452b..d066f8aa01 100644 --- a/src/calibre/gui2/init.py +++ b/src/calibre/gui2/init.py @@ -212,7 +212,7 @@ class StatusBar(QStatusBar): # {{{ if self.library_total != self.total: base = _('{0}, {1} total').format(base, self.library_total) - self.defmsg.setText('%s [%s]' % (msg, base)) + self.defmsg.setText(u'%s\xa0\xa0\xa0\xa0[%s]' % (msg, base)) self.clearMessage() def device_disconnected(self): diff --git a/src/calibre/gui2/jobs.py b/src/calibre/gui2/jobs.py index c0d61332ab..c5da726f59 100644 --- a/src/calibre/gui2/jobs.py +++ b/src/calibre/gui2/jobs.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' Job management. ''' -import re +import re, time from Queue import Empty, Queue from PyQt4.Qt import (QAbstractTableModel, QVariant, QModelIndex, Qt, @@ -29,7 +29,7 @@ from calibre.gui2.threaded_jobs import ThreadedJobServer, ThreadedJob from calibre.utils.search_query_parser import SearchQueryParser, ParseException from calibre.utils.icu import lower -class JobManager(QAbstractTableModel, SearchQueryParser): # {{{ +class JobManager(QAbstractTableModel, SearchQueryParser): # {{{ job_added = pyqtSignal(int) job_done = pyqtSignal(int) @@ -55,7 +55,7 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{ self.timer.start(1000) def columnCount(self, parent=QModelIndex()): - return 4 + return 5 def rowCount(self, parent=QModelIndex()): return len(self.jobs) @@ -64,11 +64,13 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{ if role != Qt.DisplayRole: return NONE if orientation == Qt.Horizontal: - if section == 0: text = _('Job') - elif section == 1: text = _('Status') - elif section == 2: text = _('Progress') - elif section == 3: text = _('Running time') - return QVariant(text) + return QVariant({ + 0: _('Job'), + 1: _('Status'), + 2: _('Progress'), + 3: _('Running time'), + 4: _('Start time'), + }.get(section, '')) else: return QVariant(section+1) @@ -117,6 +119,8 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{ if rtime is None: return NONE return QVariant('%dm %ds'%(int(rtime)//60, int(rtime)%60)) + if col == 4 and job.start_time is not None: + return QVariant(time.strftime('%H:%M -- %d %b', time.localtime(job.start_time))) if role == Qt.DecorationRole and col == 0: state = job.run_state if state == job.WAITING: @@ -220,7 +224,7 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{ def has_device_jobs(self, queued_also=False): for job in self.jobs: if isinstance(job, DeviceJob): - if job.duration is None: # Running or waiting + if job.duration is None: # Running or waiting if (job.is_running or queued_also): return True return False @@ -341,7 +345,7 @@ class JobManager(QAbstractTableModel, SearchQueryParser): # {{{ # }}} -class FilterModel(QSortFilterProxyModel): # {{{ +class FilterModel(QSortFilterProxyModel): # {{{ search_done = pyqtSignal(object) @@ -376,7 +380,7 @@ class FilterModel(QSortFilterProxyModel): # {{{ # Jobs UI {{{ -class ProgressBarDelegate(QAbstractItemDelegate): # {{{ +class ProgressBarDelegate(QAbstractItemDelegate): # {{{ def sizeHint(self, option, index): return QSize(120, 30) @@ -395,7 +399,7 @@ class ProgressBarDelegate(QAbstractItemDelegate): # {{{ QApplication.style().drawControl(QStyle.CE_ProgressBar, opts, painter) # }}} -class DetailView(QDialog, Ui_Dialog): # {{{ +class DetailView(QDialog, Ui_Dialog): # {{{ def __init__(self, parent, job): QDialog.__init__(self, parent) @@ -432,7 +436,7 @@ class DetailView(QDialog, Ui_Dialog): # {{{ self.log.appendPlainText(more.decode('utf-8', 'replace')) # }}} -class JobsButton(QFrame): # {{{ +class JobsButton(QFrame): # {{{ def __init__(self, horizontal=False, size=48, parent=None): QFrame.__init__(self, parent) @@ -471,7 +475,6 @@ class JobsButton(QFrame): # {{{ job_manager.job_done.connect(self.job_done) self.jobs_dialog.addAction(self.action_toggle) - def mouseReleaseEvent(self, event): self.toggle() @@ -554,7 +557,7 @@ class JobsDialog(QDialog, Ui_JobsDialog): try: geom = gprefs.get('jobs_dialog_geometry', bytearray('')) self.restoreGeometry(QByteArray(geom)) - state = gprefs.get('jobs view column layout', bytearray('')) + state = gprefs.get('jobs view column layout2', bytearray('')) self.jobs_view.horizontalHeader().restoreState(QByteArray(state)) except: pass @@ -566,7 +569,7 @@ class JobsDialog(QDialog, Ui_JobsDialog): def save_state(self): try: state = bytearray(self.jobs_view.horizontalHeader().saveState()) - gprefs['jobs view column layout'] = state + gprefs['jobs view column layout2'] = state geom = bytearray(self.saveGeometry()) gprefs['jobs_dialog_geometry'] = geom except: @@ -640,8 +643,13 @@ class JobsDialog(QDialog, Ui_JobsDialog): self.save_state() return QDialog.hide(self, *args) + def reject(self): + self.save_state() + QDialog.reject(self) + def find(self, query): self.proxy_model.find(query) # }}} + diff --git a/src/calibre/gui2/preferences/main.py b/src/calibre/gui2/preferences/main.py index 8af41c805a..5f437e71a1 100644 --- a/src/calibre/gui2/preferences/main.py +++ b/src/calibre/gui2/preferences/main.py @@ -22,7 +22,7 @@ from calibre.customize.ui import preferences_plugins ICON_SIZE = 32 -class StatusBar(QStatusBar): # {{{ +class StatusBar(QStatusBar): # {{{ def __init__(self, parent=None): QStatusBar.__init__(self, parent) @@ -39,7 +39,7 @@ class StatusBar(QStatusBar): # {{{ # }}} -class BarTitle(QWidget): # {{{ +class BarTitle(QWidget): # {{{ def __init__(self, parent=None): QWidget.__init__(self, parent) @@ -67,7 +67,7 @@ class BarTitle(QWidget): # {{{ # }}} -class Category(QWidget): # {{{ +class Category(QWidget): # {{{ plugin_activated = pyqtSignal(object) @@ -112,7 +112,7 @@ class Category(QWidget): # {{{ # }}} -class Browser(QScrollArea): # {{{ +class Browser(QScrollArea): # {{{ show_plugin = pyqtSignal(object) @@ -221,6 +221,7 @@ class Preferences(QMainWindow): self.stack.addWidget(self.scroll_area) self.scroll_area.setWidgetResizable(True) + self.setContextMenuPolicy(Qt.NoContextMenu) self.bar = QToolBar(self) self.addToolBar(self.bar) self.bar.setVisible(False) @@ -304,7 +305,6 @@ class Preferences(QMainWindow): self.bar.setVisible(True) self.bb.setVisible(False) - def hide_plugin(self): self.showing_widget = QWidget(self.scroll_area) self.scroll_area.setWidget(self.showing_widget) @@ -355,7 +355,6 @@ class Preferences(QMainWindow): if do_restart: self.gui.quit(restart=True) - def cancel(self, *args): if self.close_after_initial: self.close() @@ -389,3 +388,4 @@ if __name__ == '__main__': p.show() app.exec_() gui.shutdown() + diff --git a/src/calibre/gui2/store/search/search.py b/src/calibre/gui2/store/search/search.py index c001bfbe5c..d8147ddb22 100644 --- a/src/calibre/gui2/store/search/search.py +++ b/src/calibre/gui2/store/search/search.py @@ -26,6 +26,9 @@ from calibre.utils.filenames import ascii_filename class SearchDialog(QDialog, Ui_Dialog): + SEARCH_TEXT = _('&Search') + STOP_TEXT = _('&Stop') + def __init__(self, gui, parent=None, query=''): QDialog.__init__(self, parent) self.setupUi(self) @@ -89,7 +92,7 @@ class SearchDialog(QDialog, Ui_Dialog): self.configure.setIcon(QIcon(I('config.png'))) self.adv_search_button.clicked.connect(self.build_adv_search) - self.search.clicked.connect(self.do_search) + self.search.clicked.connect(self.toggle_search) self.checker.timeout.connect(self.get_results) self.progress_checker.timeout.connect(self.check_progress) self.results_view.activated.connect(self.result_item_activated) @@ -101,6 +104,7 @@ class SearchDialog(QDialog, Ui_Dialog): self.select_none_stores.clicked.connect(self.stores_select_none) self.configure.clicked.connect(self.do_config) self.finished.connect(self.dialog_closed) + self.searching = False self.progress_checker.start(100) @@ -161,6 +165,18 @@ class SearchDialog(QDialog, Ui_Dialog): # Affiliate self.results_view.setColumnWidth(6, 20) + def toggle_search(self): + if self.searching: + self.search_pool.abort() + m = self.results_view.model() + m.details_pool.abort() + m.cover_pool.abort() + self.search.setText(self.SEARCH_TEXT) + self.checker.stop() + self.searching = False + else: + self.do_search() + def do_search(self): # Stop all running threads. self.checker.stop() @@ -182,6 +198,8 @@ class SearchDialog(QDialog, Ui_Dialog): _('You must enter a title, author or keyword to' ' search for.'), show=True) return + self.searching = True + self.search.setText(self.STOP_TEXT) # Give the query to the results model so it can do # futher filtering. self.results_view.model().set_query(query) @@ -198,7 +216,7 @@ class SearchDialog(QDialog, Ui_Dialog): query = self.clean_query(query) shuffle(store_names) # Add plugins that the user has checked to the search pool's work queue. - self.gui.istores.join(4.0) # Wait for updated plugins to load + self.gui.istores.join(4.0) # Wait for updated plugins to load for n in store_names: if self.store_checks[n].isChecked(): self.search_pool.add_task(query, n, self.gui.istores[n], self.max_results, self.timeout) @@ -387,9 +405,15 @@ class SearchDialog(QDialog, Ui_Dialog): self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked()) def check_progress(self): - if not self.search_pool.threads_running() and not self.results_view.model().cover_pool.threads_running() and not self.results_view.model().details_pool.threads_running(): + m = self.results_view.model() + if not self.search_pool.threads_running() and not m.cover_pool.threads_running() and not m.details_pool.threads_running(): self.pi.stopAnimation() + self.search.setText(self.SEARCH_TEXT) + self.searching = False else: + self.searching = True + if unicode(self.search.text()) != self.STOP_TEXT: + self.search.setText(self.STOP_TEXT) if not self.pi.isAnimated(): self.pi.startAnimation() @@ -427,3 +451,4 @@ if __name__ == '__main__': s = SearchDialog(gui, query=' '.join(sys.argv[1:])) s.exec_() + diff --git a/src/calibre/gui2/viewer/config.py b/src/calibre/gui2/viewer/config.py index 4132149842..abf46b113e 100644 --- a/src/calibre/gui2/viewer/config.py +++ b/src/calibre/gui2/viewer/config.py @@ -15,6 +15,7 @@ from PyQt4.Qt import (QFont, QVariant, QDialog, Qt, QColor, QColorDialog, from calibre.constants import iswindows, isxp from calibre.utils.config import Config, StringConfig, JSONConfig +from calibre.gui2 import min_available_height from calibre.gui2.shortcuts import ShortcutConfig from calibre.gui2.viewer.config_ui import Ui_Dialog from calibre.utils.localization import get_language @@ -140,6 +141,7 @@ class ConfigDialog(QDialog, Ui_Dialog): self.init_load_themes() self.clear_search_history_button.clicked.connect(self.clear_search_history) + self.resize(self.width(), min(self.height(), max(575, min_available_height()-25))) def clear_search_history(self): from calibre.gui2 import config diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 1878b5e760..17ad36f908 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -29,7 +29,7 @@ from calibre.ebooks.oeb.display.webview import load_html from calibre.constants import isxp, iswindows # }}} -class Document(QWebPage): # {{{ +class Document(QWebPage): # {{{ page_turn = pyqtSignal(object) mark_element = pyqtSignal(QWebElement) @@ -356,7 +356,8 @@ class Document(QWebPage): # {{{ self.mainFrame().setScrollPosition(QPoint(x, y)) def jump_to_anchor(self, anchor): - if not self.loaded_javascript: return + if not self.loaded_javascript: + return self.javascript('window.paged_display.jump_to_anchor("%s")'%anchor) def element_ypos(self, elem): @@ -447,7 +448,7 @@ class Document(QWebPage): # {{{ @property def width(self): - return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results + return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results def set_bottom_padding(self, amount): s = QSize(-1, -1) if amount == 0 else QSize(self.viewportSize().width(), @@ -460,7 +461,7 @@ class Document(QWebPage): # {{{ # }}} -class DocumentView(QWebView): # {{{ +class DocumentView(QWebView): # {{{ magnification_changed = pyqtSignal(object) DISABLED_BRUSH = QBrush(Qt.lightGray, Qt.Dense5Pattern) @@ -766,8 +767,10 @@ class DocumentView(QWebView): # {{{ @dynamic_property def current_language(self): - def fget(self): return self.document.current_language - def fset(self, val): self.document.current_language = val + def fget(self): + return self.document.current_language + def fset(self, val): + self.document.current_language = val return property(fget=fget, fset=fset) def search(self, text, backwards=False): @@ -816,7 +819,6 @@ class DocumentView(QWebView): # {{{ self.scrollbar.blockSignals(False) self._ignore_scrollbar_signals = False - def load_finished(self, ok): if self.loading_url is None: # An