Merge prior to trunk merge.

2025-11-21 22:13:04 -05:00 · 2011-02-04 15:55:18 -05:00 · 2011-02-04 15:55:18 -05:00 · 7c044c77cc
commit 7c044c77cc
parent d80f86e097 8eda01abc2
359 changed files with 211983 additions and 110065 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,365 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+#- version: ?.?.?
+#  date: 2011-??-??
+#
+#  new features:
+#    - title: 
+#  
+#  bug fixes:
+#    - title: 
+#
+#  improved recipes:
+#    - 
+#
+#  new recipes:
+#    - title: 
+
+- version: 0.7.44
+  date: 2011-02-04
+
+  new features:
+    - title: "Nook Color driver: Send downloaded news to the My Files/Magazines folder on the Nook Color. Also when getting the list of books on the device look at all folders in My Files, not just My Files/Books."
+
+    - title: "MOBI Output: Use the book uuid as the ASIN field and set cdetype to EBOK to allow Amazon furthest read tracking to work with calibre generated MOBI files."
+      tickets: [8721]
+
+    - title: "Comic input: Add an option to override the image size in the generated comic. Useful if you have a device whose screen size is not coverred by one of the available output profiles."
+      tickets: [7837]
+
+    - title: "Add a restore database option to the Library maintenance menu in the GUI"
+
+    - title: "TXT Output: Allow output in the textile markup language"
+
+    - title: "PML Output: Create multi-level Table of Contents"
+
+    - title: "Driver for the Archos 7O"
+
+    - title: "Search and Replace in the Bulk metadata dialog can now operate on the title_sort field as well"
+      tickets: [8732]
+
+    - title: "Allow changing the case of authors/tags/series etc. via the edit metadata dialog"
+
+    - title: "Connect/share menu: Re-organize to make it a little less easy to select email and delete instead of just email by mistake"
+
+    - title: "Heuristics: Improved Scene break detection and add option to control what scene breaks are replaced by."
+
+    - title: "SONY driver: Add option to not preserve aspect ratio of cover thumbnails."
+
+    - title: "BiBTeX catalog: Add on device column when available"
+
+    - title: "Add search to the plugin preferences dialog"
+  
+  bug fixes:
+    - title: "Fix a bug that could cause fiels to be lost when changing metadata on east asian windows installs if the title and/or author is very long."
+      tickets: [8620]
+
+    - title: "Tag browser: Fix searching with items in a user category not owrking if the main category is hidden"
+      tickets: [8741]
+
+    - title: "Make completion for author/series/tags/etc. fields less disruptive"
+
+    - title: "Fix regression that broke the content server when user categories/custom columns are present"
+
+    - title: "Catalog generation: Handle user supplied templates more robustly"
+
+    - title: "Move the Tags to apply to newly added books option into Preferences->Adding books"
+      tickets: [8730]
+
+    - title: "Workaround for bug in Qt on OS X that caused crashes when reading metedata from two or more EPUB files with HTML covers that used embedded fonts. Now the embedded fonts are ignored on OS X."
+      tickets: [8643]
+
+    - title: "Fix regression that broke the use of the group searched terms tweak"
+      tickets: [8739]
+
+    - title: "Fix template program regression triggered by recursively calling the processor"
+
+    - title: "Fix mimetype sent by content server for PDB files"
+
+    - title: "OPF: Write title_sort as a calibre custom metadata field rather than as a file-as attribute on the title. This conforms to the OPF spec"
+      tickets: [7883]
+
+    - title: "SONY driver: Fix thumbnails being sent to SD card are sent to the wrong location. Also use correct thumbnail size so that the SONY does not regenerate the thumbnail on disconnect"
+
+    - title: "Do not discard the result of a conversion if the user opens the edit metadata dialog while the conversion is running"
+      tickets: [8672]
+
+    - title: "CHM Input: When the chm file lacks a hhc, lookf for index.html instead"
+      tickets: [8688]
+
+    - title: "EPUB Input: Filter some invalid media types from the spine"
+
+    - title: "RTF Input: More encoding handlig fixes."
+      tickets: [8678]
+
+    - title: "Linux binary build: Restore functioning of CALIBRE_DEVELOP_FROM, which was accidentally removed a few versions ago"
+
+    - title: "RTF Output: Retain html headings as rtf headings when converting to rtf. Also fix output of italics."
+      tickets: [8641, 8640]
+
+    - title: "LIT Input: Fix regression that broke handling of LIT files that contain txt data instead of html"
+
+    - title: "MOBI Input: Handle more non printing ASCII codes"
+      tickets: [8646]
+
+    - title: "Handle empty cover files more gracefully"
+      tickets: [8656]
+
+    - title: "Catalog geenration: Fix error when Pocketbook is connected and trying to geenrate catalog"
+      tickets: [8651]
+
+    - title: "Heuristics: Italicize common cases, reduce false positives."
+
+    - title: "Fix regression that caused reporting of device connection errors to break"
+
+  improved recipes:
+    - MSN Japan
+    - The Onion
+    - La Tribuna de
+    - Wall Street Journal
+    - "20 Minutos"
+    - LA Times
+    - Endgadget Japan
+    - Ledevoir
+    - Vijesti
+
+  new recipes:
+    - title: "Cinco Dias and BBC Mundo"
+      author: Luis Hernandez
+
+    - title: "Explosm"
+      author: Andromeda Rabbit
+
+    - title: "Cinco Dias"
+      author: Luis Hernandez
+
+
+- version: 0.7.43
+  date: 2011-01-28
+
+  new features:
+    - title: "Ask for confirmation when stopping running jobs"
+      tickets: [3101]
+
+    - title: "Combine the database integrity check and library check into a single menu item. Also nicer implementation of the db integrity check."
+
+    - title: "BiBTeX Catalog: Add option to include file paths in the catalog."
+      tickets: [8589]
+
+    - title: "Create 'generic' output profiles and generic devices in the welcome wizard"
+
+    - title: "Bulk metadata edit: Custom column widgets all have an apply checkbox next to them."
+
+    - title: "Only use LibraryThing to download metadata if the user provides a library thing username and password. Since LT doesn't like web scraping"
+
+    - title: "Allow renaming of user categories in the manage categories dialog. Also allow searching for books in a category from the Tag Browser by right clicking ona a category"
+
+    - title: "Folder device plugin: Add option to disable the use of sub folders"
+
+    - title: "Allow saving/loading of search and replace expressions in the bulk metadata edit dialog."
+
+    - title: "Remeber previously used regular expression in the add books preferences dialog"
+
+    - title: "Search and replace wizard: Cache the previously used input document."
+
+    - title: "Pressing Esc clears the current search in the main book list"
+
+    - title: "Preselct right formats when using send specific format to device"
+      tickets: [7834]
+
+    - title: "Regex wizard gets find next and previous match buttons"
+      tickets: [4486]
+  
+  bug fixes:
+    - title: "Do not allow customization of user interface plugins until calibre is restarted"
+      tickets: [8621]
+
+    - title: "EPUB Output: When using preserve cover aspect ratio, use the actual image sizes in the SVG template as otherwise ADE doesn't fully preserve the aspect ratio"
+
+    - title: "Fix completion on a word with a trailing space causing the first letter to be duplicated in the edit metadata dialog"
+
+    - title: "PML Input: PML x and Xn tags don't indent properly in TOC. Also handle invalid T markup and retain soft scene breaks"
+      tickets: [6194, 8565]
+
+    - title: "TXT Input: Retain whitespace at the beginning of lines. Don't preserve spaces in heuristic processing. Detect and retain soft scene breaks."
+
+    - title: "Fix Adding empty book - cover browser doesn't update"
+      tickets: [8557]
+
+    - title: "When generating author sort string ignore trailing Inc."
+      tickets: [8539]
+
+    - title: "When converting HTML/ZIP files do not leave temporary files that are only deleted on application shutdown."
+      tickets: [8597]
+
+    - title: "Don't crash if the prefs stored in the db are corrupted"
+
+    - title: "Catalog generation: Do not use inline-block CSS as apparently Adobe Digital Editions cannot handle it."
+      tickets: [8566]
+
+    - title: "Fix extra spaces being inserted into TOC title when reading TOC from OPF guide element."
+      tickets: [8569]
+
+    - title: "Remember window size for bulk metadata edit and catalog generation dialogs"
+      tickets: [8525]
+
+    - title: "Heuristics, italicize common cases: Enhance pattern matching to match punctuation after pattern."
+
+    - title: "Fix regression in converting HTML files that have ASCII-encoded non unicode characters inside their <style> tags. Apparently Word generates these."
+      tickets: [8494]
+
+  improved recipes:
+    - Calgary Herald
+    - The Economist
+    - New Yorker
+    - Heise
+    - HNA
+    - ZDNet
+    - NRC Handelsblad
+
+  new recipes:
+    - title: "SPIN Magazine"
+      author: Quistopher
+
+    - title: "Caps n Babes"
+      author: skyhawker
+
+    - title: "Leduc"
+      author: Brian Hahn
+
+    - title: "David Bravo's Blog, La Nueva Espana, 20 Minutos and La Tribuna de Talavera"
+      author: Luis Hernandez
+
+    - title: "Sinfest"
+      author: nadid
+
+    - title: "Various Czech news sources"
+      author: FunThomas
+
+    - title: "tportal.h"
+      author: Darko Miletic
+
+    - title: "Everett Herald"
+      author: "77jag5"
+
+    - title: "Roger Ebert"
+      author: Shane Erstad
+
+- version: 0.7.42
+  date: 2011-01-21
+
+  new features:
+    - title: "Conversions: Replace the remove header/footer options with a more geenric search replace option, that allows you to not only remove but also replace text"
+
+    - title: "Conversion: The preprocess html option has now become a new 'Heuristic Processing' option which allows you to control exactly which heuristics are used"
+
+    - title: "Conversion: Various improvements to Heuristic Processing (used to be preprocess HTML)"
+
+    - title: "When adding empty books to calibre, optionally set the author to the author of the currently selected book"
+      tickets: [7702]
+
+    - title: "Device drivers for the Archos 101, SmatQ T7 and Acer Lumiread"
+
+    - title: "Catalog generation: Make By Authors optional"
+
+    - title: "Allow bulk editing of Date and Published columns."
+
+    - title: "Add a little button to clear date and published values to the edit metadata dialogs"
+
+    - title: "When adding books by ISBN, allow the specification of special tags that will be added to the new book entries"
+      tickets: [8436]
+
+    - title: "Completion on multiple authors"
+      tickets: [8405]
+
+    - title: "Add AZW to default list of internally viewed formats, a I am tired of getting tickets about it"
+
+    - title: "Nicer error message when catalog generation fails"
+
+    - title: "Add capitalize option to context menus in the edit metadata dialog"
+
+  bug fixes:
+    - title: "RTF Input: Fix regression in 0.7.40 that broke conversion of some old style RTF files"
+
+    - title: "Fix Tag editor forgets position"
+      tickets: [8271]
+
+    - title: "When converting books in the calibre GUI, override metadata from the input document, even when empty."
+      description: >
+        "So if you have removed all the tags and comments in the calibre GUI for the book in the calibre GUI, but the actual file that is being converted still has tags and comments, they are ignored. This affects only conversions in the calibre GUI, not from the command line via ebook-convert."
+      tickets: [8390]
+
+    - title: "Fix memory leak when switching libraries"
+
+    - title: "RTF Output: Fix incorrent spacing between letters."
+      tickets: [8422]
+
+    - title: "Catalog generation: Add composite columns to Merge Comments eligible types"
+
+    - title: "Add a confirmation when closing the add a custom news source dialog."
+      tickets: [8460]
+
+    - title: "Another workaround for LibraryThing UA sniffing that was preventing series metadata download, sigh."
+      tickets: [8477]
+
+    - title: "PD Novel driver: Put books on the SD card into the eBooks folder"
+
+    - title: "When shortening filepaths to conform to windows path length limitations, remove text from the middle of each component instead of the ends."
+      tickets: [8451]
+
+    - title: "Make completion in most places case insensitive"
+      tickets: [8441]
+
+    - title: "Fix regression that caused the N key to stop working when editing a Yes/no column"
+      tickets: [8417]
+
+    - title: "Email: Fix bug when connecting to SMTP relays that use MD5 auth"
+
+    - title: "MOBI Output: Fix bug that could cause a link pointing to the start of a section to go to a point later in the section is the section contained an empty id attribute"
+
+    - title: "When auto converting books and the device is unplugged, do not raise an error."
+      tickets: [8426]
+
+    - title: "Ebook-viewer: Display cover when viewing FB2 files"
+
+    - title: "MOBI Input: Special case handling of emptu div tags with a defined height used as paragraph separators."
+      tickets: [8391]
+
+    - title: "Fix sorting of author names into sub categories by first letter in the Tag Browser when the first letter has diacritics"
+      tickets: [8378]
+
+    - title: "Fix regression in 0.7.40 that caused commas in author names to become | when converting/saving to disk"
+
+    - title: "Fix view specific format on a book with no formats gives an error"
+      tickets: [8352]
+
+
+  improved recipes:
+    - Blic
+    - Las Vegas Review Journal
+    - La Vanguardia
+    - New York Times
+    - El Pais
+    - Seattle Times
+    - Ars Technica
+    - Dilbert
+    - Nature News
+
+  new recipes:
+    - title: "kath.net"
+      author: "Bobus"
+
+    - title: "iHNed"
+      author: "Karel Bilek"
+
+    - title: "Gulf News"
+      author: "Darko Miletic"
+
+    - title: "South Africa Mail and Guardian"
+      author: "77ja65"
+
+
 - version: 0.7.40
  date: 2011-01-14

--- a/format_docs/compression/palmdoc.txt
+++ b/format_docs/compression/palmdoc.txt
@ -0,0 +1,54 @@
+About
+-----
+
+PalmDOC uses LZ77 compression techniques. DOC files can contain only compressed
+text. The format does not allow for any text formatting. This keeps files
+small, in keeping with the Palm philosophy. However, extensions to the format
+can use tags, such as HTML or PML, to include formatting within text. These
+extensions to PalmDoc are not interchangeable and are the basis for most eBook
+Reader formats on Palm devices.
+
+LZ77 algorithms achieve compression by replacing portions of the data with
+references to matching data that has already passed through both encoder and
+decoder. A match is encoded by a pair of numbers called a length-distance pair,
+which is equivalent to the statement "each of the next length characters is
+equal to the character exactly distance characters behind it in the
+uncompressed stream." (The "distance" is sometimes called the "offset" instead.)
+
+In the PalmDoc format, a length-distance pair is always encoded by a two-byte
+sequence. Of the 16 bits that make up these two bytes, 11 bits go to encoding
+the distance, 3 go to encoding the length, and the remaining two are used to
+make sure the decoder can identify the first byte as the beginning of such a
+two-byte sequence.
+
+PalmDoc combines LZ77 with a simple kind of byte pair compression.
+
+
+PalmDoc files are decoded as follows:
+-------------------------------------
+
+Read a byte from the compressed stream. If the byte is
+
+0x00: "1 literal" copy that byte unmodified to the decompressed stream.
+
+0x09 to 0x7f: "1 literal" copy that byte unmodified to the decompressed stream.
+
+0x01 to 0x08: "literals": the byte is interpreted as a count from 1 to 8, and
+that many literals are copied unmodified from the compressed stream to the
+decompressed stream.
+
+0x80 to 0xbf: "length, distance" pair: the 2 leftmost bits of this byte ('10')
+are discarded, and the following 6 bits are combined with the 8 bits of the
+next byte to make a 14 bit "distance, length" item. Those 14 bits are broken
+into 11 bits of distance backwards from the current location in the
+uncompressed text, and 3 bits of length to copy from that point
+(copying n+3 bytes, 3 to 10 bytes).
+
+0xc0 to 0xff: "byte pair": this byte is decoded into 2 characters: a space
+character, and a letter formed from this byte XORed with 0x80.
+
+Repeat from the beginning until there is no more bytes in the compressed file.
+
+PalmDOC data is always divided into 4096 byte blocks and the blocks are acted
+upon independently. 
+
--- a/format_docs/compression/zip.txt
+++ b/format_docs/compression/zip.txt
--- a/format_docs/pdb/ereader.txt
+++ b/format_docs/pdb/ereader.txt
@ -0,0 +1,309 @@
+About
+-----
+
+The eReader format has evolved and changed over time. Subsequently, there are
+multiple versions of the eReader format. There are also two different tools
+that can create eReader files. The official tools are Makebook and Dropbook.
+Dropbook is the newer official tool that has replaced Makebook. However,
+Makebook is still in wide use because it supports a wider range of platforms
+than Dropbook. Dropbook is a GUI application that only runs on Windows and
+Apple’s OS X.
+
+
+PDB Identiy
+-------
+
+PNRdPPrs
+
+
+202 and 132 headers
+-----------------------------------------
+
+Older files have a record 0 size of 202 and occasionally 116. Newer files have
+a record 0 size of 132. As of this writing the 202 files only support text and
+images. The image format in the 202 files is the same as the 132 files. The 132
+files support a number of additional features.
+
+
+Record 0, eReader header (202)
+------------------
+
+Note all values are in 2 byte increments. Like values are condensed into a
+range. The range can be borken into 2 byte sections which represent the actual
+stored values.
+
+bytes       content             comments
+
+0-2         Version             Non-DRM books 2 and 4.
+2-8         Garbage
+8-10        Non-Text Offset     Start of Non text area (images) will run to the
+                                end of the section list.
+10-14       Unknown
+14-24       Garbage
+24-28       Unknown
+28-98       Garbage
+98-100      Unknown
+100-110     Garbage
+110-114     Unknown
+114-116     Garbage
+116-202     Unknown
+
+* Garbage: Intentially random values.
+
+
+Text Records (202)
+------------------
+
+Text starts with section 1 and continues until the section indicated by the
+Non-Text Offset. All text records are PalmDoc compressed.
+
+Each character in the compressed data is xored with 0xA5.
+
+A decompression example in sudo Python:
+
+for num in range(1, Non-Text Offset):
+    text += decompress_pamldoc(''.join([chr(ord(x) ^ 0xA5) for x in section_data(num)])).decode('cp1252', 'replace')
+
+
+Dropbook 132 files
+------------------
+
+The following sections apply to the newer Dropbook created files.
+
+
+Record 0, eReader header (132)
+----------------------------
+
+This is only for 132 byte header files created by Dropbook.
+
+bytes   content                     comments
+
+0-2     compression                 Specifies compression and drm. 2 = palmdoc,
+                                    10 = zlib. 260 and 272 = DRM
+2-6     unknown                     Value of 0 is used
+6-8     encoding                    Always 25152 (0x6240). All text must be
+                                    encoded as Latin-1 cp1252
+8-10    Number of small pages       The number of small font pages. If page
+                                    index is not build in then 0.
+10-12   Number of large pages       The number of large font pages. If page
+                                    index is not build in then 0.
+12-14   Non-Text record start       The location of the first non text records.
+                                    record 1 to this value minus 1 are all text
+                                    records
+14-16   Number of chapters          The number of chapter index records
+                                    contained in the file
+16-18   Number of small index       The number of small font page index records
+                                    contained in the file
+18-20   Number of large index       The number of large font page index records
+                                    contained in the file
+20-22   Number of images            The number of images contained in the file
+22-24   Number of links             The number of links contained in the file
+24-26   Metadata avaliable          Is there a metadata record in the file?
+                                    0 = None, 1 = There is a metadata record
+26-28   Unknown                     Value of 0 is used
+28-30   Number of Footnotes         The number of footnote records in the file
+30-32   Number of Sidebars          The number of sidebar records in the file
+32-34   Chapter index record start  The location of chapter index records. If
+                                    there are no chapters use the value for the
+                                    Last data record.
+34-36   2560                        Magic value that must be set to 2560
+36-38   Small page index start      The location of small font page index
+                                    records. If page table is not built in use
+                                    the value for the Last data record.
+38-40   Large page index start      The location of large font page index
+                                    records. If page table is not built in use
+                                    the value for the Last data record.
+40-42   Image data record start     The location of the first image record. If
+                                    there are no images use the value for the
+                                    Last data record.
+42-44   Links record start          The location of the first link index
+                                    record. If there are no links use the value
+                                    for the Last data record.
+44-46   Metadata record start       The location of the metadata record. If
+                                    there is no metadata use the value for the
+                                    Last data record.
+46-48   Unknown                     Value of 0 is used
+48-50   Footnote record start       The location of the first footnote record.
+                                    If there are no footnotes use the value for
+                                    the Last data record.
+50-52   Sidebar record start        The location of the first sidebar record.
+                                    If there are no sidebars use the value for
+                                    the Last data record.
+52-54   Last data record            The location of the last data record
+54-132  Unknown                     Value of 0 is used
+
+Note: All values are in 2 byte increments. All bytes in the table that have a
+range larger than 2 can be broken into 2 byte segments and have different
+values set for each grouping.
+
+
+Records Order
+-------------
+
+Though the order of this sections is described in eReader header,
+DropBook makes the following order:
+
+   1. eReader Header
+   2. Compressed text
+   3. Small font page index
+   4. Large font page index
+   5. Chapter index
+   6. Links index
+   7. Images
+   8. (Extrapolation: there should be one more record type here though it has
+       not yet been uncovered what it might be).
+   9. Metadata
+  10. Sidebar records
+  11. Footnote records
+  12. Text block size record
+  13. "MeTaInFo\x00" word record 
+
+
+Text Records
+------------
+
+All text records use cp1252  encoding (although eReader documents talk about
+UTF-8 as well). Their total compressed size is unknown however, anything below
+3560 Bytes is known to work. The text will be either zlib or palmdoc
+compressed. Use the compression value from the eReader header to determine
+which. All text utalizes the Palm Markup Language (PML) for formatting.
+
+Starting with DropBook 1.6.0 text is divided into 8KB (8192 bytes) blocks
+trimming the end to the closest space character and then being compressed.
+Earlier version of DropBook 1.5.2 tries to behave the same way, though
+sometimes it trims the block in unexpected place.
+
+
+Chapter Index Records
+---------------------
+
+Each chapter record corresponds to 1 chapter and points at the place in the
+book. Chapter record takes a form of 'offset name\x00' First 4 bytes are offset
+of the original pml file where the chapter index points to (offset of
+the \x|\X?|\C? tags). Then without a space goes a name of a chapter in chapter
+index. It should contain only text, all formatting tags should be removed.
+\U and \a tags are not permitted in chapter name. To maintain sub-chapters
+4*n spaces (\x20) are added to the beginning of the name, where "n" is level of
+chapter: 0 for \x tag and N for \CN="" and \XN tags. And then an ending
+\x00 symbol.
+
+
+Image Records
+-------------
+
+Image records must be smaller than 65505 Bytes. They must also be 8bit PNG
+images.
+
+An image record takes the form 'PNG name\x00... image_data'
+
+bytes   content         comments
+
+0-4     PNG             There must be a space after PNG.
+4-36    image name.     The image name must be 32 exactly 32 Bytes long. Pad
+                        the right side of the name with \x00 characters for
+                        names shorter than 32 characters.
+36-58   Unknown	
+58-60   width           Width of an image
+60-62   height          Height of an image
+62-?    The image data  raw image data in 8 bit PNG format
+
+Note: DropBooks seems to change something in png raw data. Like reencoding or
+something, but plain insertion of png image there still works. 
+
+
+Links Records
+-------------
+
+Links records are constructed the same way as chapter ones. Each link anchor
+record corresponds to 1 link anchor and points at the place in the book. Link
+record takes a form of 'offset name\x00' First 4 bytes are offset of the
+original pml file where the link anchor points to (offset of the \Q tag). Then
+without a space goes a name of a link anchor. It should contain only text, all
+formatting tags should be removed. \U and \a tags are not permitted in link
+anchor name. And then an ending \x00 symbol.
+
+
+Footnote Records
+----------------
+
+The first footnote record is a \x00 separated list of footnote ids. All
+subsequent footnote records are the footnote text corresponding to the id's
+position in the list. Footnote text is compressed in the same manner as normal
+text records
+
+E.G.
+
+footnote section 1 = 'notice1\x00notice2\x00notice3\x00'
+footnote section 2 = 'Text for notice 1'
+footnote section 3 = 'Text for notice 2'
+footnote section 4 = 'Text for notice 3'
+
+Starting with Dropbook 1.5.2 first record looks a bit different. It is sequence
+of \x00\x01 then 1 byte of footnote id length, then footnote id then \x00.
+
+E.G.
+
+footnote section 1 = '\x00\x01\x07notice1\x00\x00\x01\x0Afootnote10\x00'
+
+
+Sidebar Records
+---------------
+
+The first sidebar record is a \x00 separated list of sidebar ids. All
+subsequent sidebar records are the sidebar text corresponding to the id's
+position in the list. Sidebar text is compressed in the same manner as normal
+text records
+
+E.G.
+
+sidebar section 1 = 'notice1\x00notice2\x00notice3\x00'
+sidebar section 2 = 'Text for notice 1'
+sidebar section 3 = 'Text for notice 2'
+sidebar section 4 = 'Text for notice 3'
+
+Starting with Dropbook 1.5.2 first record looks a bit different. It is sequence
+of \x00\x01 then 1 byte of sidebar's id length, then sidebar's id then \x00.
+
+E.G.
+
+sidebar section 1 = '\x00\x01\x07notice1\x00\x00\x01\x09sidebar10\x00'
+
+
+Metadata Record
+---------------
+
+\x00 separated list of string.
+
+Metadata takes the form:
+
+  title\x00
+  author\x00
+  copyright\x00
+  publisher\x00
+  isbn\x00
+
+E.G.
+
+Gibraltar Earth\x00Michael McCollum\x001999\x00Sci Fi Arizona\x001929381255\x00
+
+The metdata record is always followed by a record which contains 'MeTaInFo\x00'
+
+Note: Starting with DropBook 1.5.2 'MeTaInFo\x00' is not following Metadata
+Record. It is a separate record that ends the file and there are some more
+records between Metadata record and 'MeTaInFo\x00' record.
+
+
+Text Sizes Record
+-----------------
+
+There is a special record that contains the initial size of all text blocks
+before compression. It is just a sequence of 2-byte blocks which are containing
+the sizes.
+
+E.G.
+
+\x1F\xFB\x20\x00\x20\x00\x1F\xFE\x1F\xFD\x09\x46
+
+Note: By this we can judge that theoretical maximum of initial block size is
+65535 bytes. 
+
--- a/format_docs/pdb/mbp.txt
+++ b/format_docs/pdb/mbp.txt
@ -0,0 +1,414 @@
+// BEGINING OF FILE
+//   NOTES:
+//   1* Numeric data stored as big endian, 32 bits.
+//   2* Data padded to 16 bits limits. (Sometimes to 32 bits limits?)
+//   3* Text stored seems to be an 8 bit encoding padded to 16 bits
+//    (may be "ISO-8859-1"?, or may be just a local machine character set?)
+//   4* I initially used the term "MARK" where I should have used "HIGHLIGTH", 
+//     bear that in mind (it was a bad name election when I started reversing)
+
+<0x 31 bytes = book_title_PAR + 0x00 PAD if (book_title_PAR < 31) >
+<0x 00>
+<0x 00 00 00 00>
+...4
+...4
+<0x 00 00 00 00>
+<0x 00 00 00 00>
+<0x 00 00 00 00>
+<0x 00 00 00 00>
+BPAR
+MOBI
+<0x 4 bytes = Next free pointer identifier>
+	// Note: pointer identifiers aren't always consecutive,
+	// so this number is usually bigger than de # of index entries
+<0x 00 00>
+<0x 4 bytes = Number of index entries>
+<0x 4 bytes = Position of BPAR>
+<0x 00 00 00 00>	// BPAR pointer identifier = 0x0
+
+
+// INDEXES:
+// Order of Indexes: from the beginning of this MBP file, 
+// forward to the end of the file.
+// Nevertheless, see these comments for order relative to: 
+//   "BEGINING OF USER DATA": order of Data marks.
+//   "FINAL GROUP OF MARKS": order of final marks.
+[for each {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK,
+		AUTHOR,TITLE,CATEGORY,GENRE,ABSTRACT,COVER,PUBLISHER,
+		...} 
+	  || "last DATA"]
+// Note: Pointer identifiers to DATA's assigned so the number
+// shrinks as the table grows down.
+[if NOTE || CORRECTION]
+	<0x 4 bytes = Position of DATA....EBVS>
+	<0x 4 bytes = Pointer identifier, used by BKMK blocks>
+[fi NOTE || CORRECTION]
+<0x 4 bytes = Position of DATA>
+<0x 4 bytes = Pointer identifier, used by BKMK blocks>
+[if NOTE || CORRECTION]
+	<0x 4 bytes = Position of DATA>
+	<0x 4 bytes = Pointer identifier, used by BKMK blocks>
+[fi NOTE || CORRECTION]
+[if MARK || DRAWING || BOOKMARK]
+	<0x 4 bytes = Position of DATA....EBVS>
+	<0x 4 bytes = Pointer identifier, used by BKMK blocks>
+[fi MARK || DRAWING || BOOKMARK]
+[if AUTHOR || TITLE || CATEGORY || GENRE || ABSTRACT || COVER || PUBLISHER]
+	<0x 4 bytes = Position of [AUTH || TITL || CATE || GENR || ABST || COVE || PUBL] >
+	<0x 4 bytes = Pointer identifier>
+[fi AUTHOR || TITLE || CATEGORY || GENRE || ABSTRACT || COVER || PUBLISHER]
+[if last DATA] // there's always a last piece of DATA (not user data?)
+	<0x 4 bytes = Position of last DATA>
+	<0x 4 bytes = Pointer identifier>	// usually <0x 00 00 00 01>
+[fi last DATA]
+[next {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK,
+		AUTHOR,TITLE,CATEGORY,GENRE,ABSTRACT,COVER,PUBLISHER,
+		...} 
+      || "last DATA"]
+
+
+[for each {NOTE,MARK,CORRECTION,DRAWING}]
+<0x 4 bytes = Position of BKMK>
+<0x 4 bytes = Pointer identifier>
+	// Note: pointer identifiers for BKMK's are usually the minor
+	// of all the identifiers associated to an annotation. All
+	// other DATA references in INDEXES table associated to this
+	// BKMK, have bigger pointer identifiers.
+	// Note: Pointer identifiers to BKMK's assigned so the number
+	// grows as the table grows down.
+[next {NOTE,MARK,CORRECTION,DRAWING}]
+
+
+<0x 2 bytes random PAD>
+BPAR
+<0x 4 bytes = size of BPAR block>
+<0x FF FF FF FF>
+...4	<-- 'position of last read' related
+...4	<-- 'position of last read' related
+...4
+<0x FF FF FF FF>
+...4
+...4
+...4	<-- 'position of last read' related
+...(rest of size of BPAR block, if bigger than 0x20)
+[if (size of BPAR block) mod 32 != 0]
+<0x FF FF FF FF>
+[fi]
+
+// BEGINING OF USER DATA:
+// Order of {NOTE,MARK,CORRECTION,DRAWING} : 
+// starts with user data at the end of the file, 
+// going backwards to the begining of the file:
+//--------------------------------------------------------------------
+[for each {NOTE,MARK,CORRECTION,DRAWING}]
+//-------------------------------
+[if NOTE]
+DATA
+<0x 4 bytes = size of DATA block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03> ???
+<0x 4 bytes = IDENTIFIER> ???
+[<0x 00 00 00 01>, or nothing at all] ???
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(rest of size of DATA block)
+<0x FD EA = PAD? (ýê)>
+DATA
+<0x 4 bytes = size of <marked text (see 3rd note)> >
+<marked text (see 3rd note)>
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of <note text (see 3rd note)> >
+<note text (see 3rd note)>
+[if (size of <note text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <note text (see 3rd note)>) mod 4 ==0>
+[fi]
+[fi NOTE]
+//-------------------------------
+[if MARK || BOOKMARK]
+DATA
+<0x 4 bytes = size of <marked text (see 3rd note)> >
+<marked text (see 3rd note)>
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of DATA block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03> ???
+<0x 4 bytes = IDENTIFIER> ???
+[<0x 00 00 00 01>, or nothing at all] ???
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(rest of size of DATA block)
+<0x FD EA = PAD? (ýê)>
+[fi MARK || BOOKMARK]
+//-------------------------------
+[if CORRECTION]
+DATA
+<0x 4 bytes = size of DATA block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03> ???
+<0x 4 bytes = IDENTIFIER> ???
+[<0x 00 00 00 01>, or nothing at all] ???
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(rest of size of DATA block)
+<0x FD EA = PAD? (ýê)>
+DATA
+<0x 4 bytes = size of <marked text (see 3rd note)> >
+<marked text (see 3rd note)>
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of <note text (see 3rd note)> >
+<note text (see 3rd note)>
+[if (size of <note text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <note text (see 3rd note)>) mod 4 ==0>
+[fi]
+[fi CORRECTION]
+//-------------------------------
+[if DRAWING]
+DATA
+<0x 4 bytes = size of raw data>
+ADQM
+	// NOTE: bakground color is stored in corresponding BKMK.
+	[begin DRAWING format]
+		...4 = <0x 00 00 00 01> ???
+		<0x 4 bytes = X POSITION OF UPPER LEFT CORNER??? > 
+		<0x 4 bytes = Y POSITION OF UPPER LEFT CORNER??? > 
+		<0x 4 bytes = X SIZE in pixels > 
+		<0x 4 bytes = Y SIZE in pixels > 
+		...4 = <0x 00 00 00 00> ???
+		<0x 4 bytes = number of STROKES>
+		[if "number of STROKES" == 0]
+			<0x 00 00 00 00>
+			[end DRAWING format]	
+		[fi]
+		[for each STROKE]
+			<0x 00 00 00 01> ???
+			<0x 4 bytes> = 
+				Stroke's beginning position in list of coordinates.
+			<0x 4 bytes> = 
+				Stroke's ending position in list of coordinates.
+			<0x 00 RR GG BB> = RRGGBB color of stroke.
+		[next STROKE]
+		<0x 4 bytes> = number of coordinate pairs in array of coordinates.
+		// NOTE: each stroke is formed out of at least three 
+		// coordinate pairs: begin, {next point}(1-n), end point.
+		[for each COORDINATE]
+			<0x 4 bytes> = X coordinate
+			<0x 4 bytes> = Y coordinate
+		[next COORDINATE]
+	[end DRAWING format]
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of <marked text (see 3rd note)> >
+<marked text (see 3rd note)>
+[if (size of <marked text (see 3rd note)>) mod 4 !=0]
+<0x random PAD until (size of <marked text (see 3rd note)>) mod 4 ==0>
+[fi]
+DATA
+<0x 4 bytes = size of DATA block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03>
+<0x 4 bytes = IDENTIFIER>
+[<0x 00 00 00 01>, or nothing at all] ???
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(size of DATA block - 30)
+<0x FD EA = PAD? (ýê)>
+[fi DRAWING]
+//-------------------------------
+[next {NOTE,MARK,CORRECTION,DRAWING}]
+
+// AUTHOR (if any)
+//--------------------------------------------------------------------
+[if AUTHOR]
+AUTH
+<0x 4 bytes = size of AUTHOR block>
+<text (see 3rd note)>
+[fi AUTHOR]
+//--------------------------------------------------------------------
+// TITLE (if any)
+//--------------------------------------------------------------------
+[if TITLE]
+TITL
+<0x 4 bytes = size of TITLE block>
+<text (see 3rd note)>
+[fi TITLE]
+//--------------------------------------------------------------------
+// GENRE (if any)
+//--------------------------------------------------------------------
+[if GENRE]
+GENR
+<0x 4 bytes = size of GENRE block>
+<text (see 3rd note)>
+[fi GENRE]
+//--------------------------------------------------------------------
+// ABSTRACT (if any)
+//--------------------------------------------------------------------
+[if ABSTRACT]
+ABST
+<0x 4 bytes = size of ABSTRACT block>
+<text (see 3rd note)>
+[fi ABSTRACT]
+//--------------------------------------------------------------------
+
+// FINAL DATA
+// Note: 'FINAL DATA' can occur anytime between these marks: 
+//   AUTHOR,TITLE,CATEGORY,GENRE,ABSTRACT,COVER,PUBLISHER,...
+//--------------------------------------------------------------------
+DATA
+<0x 4 bytes = size of EBVS block>
+[if EBAR]	// this block can appear, or not... ???
+	EBAR
+	...various {4 x byte} ???
+[fi EBAR]
+EBVS
+<0x 00 00 00 03> || <0x 00 00 00 04> 
+<0x 4 bytes || 8 bytes = IDENTIFIER>
+<0x 00 00 00 08>
+<0x FF FF FF FF>
+<0x 00 00 00 00>
+<0x 00 00 00 10>
+...(size of EBVS block - 30) :
+	...4	<-- 'position of last read' related
+	...various {4 x byte} ???
+	...4	<-- 'position of last read' related
+	...4
+	...4
+	...4
+<0x FD EA = PAD? (ýê)>
+//--------------------------------------------------------------------
+
+// CATEGORY (if any)
+//--------------------------------------------------------------------
+[if CATEGORY]
+CATE
+<0x 4 bytes = size of CATEGORY block>
+<text (see 3rd note)>
+[fi CATEGORY]
+//--------------------------------------------------------------------
+// COVER (if any)
+//--------------------------------------------------------------------
+[if COVER]
+COVE
+<0x 4 bytes = size of COVER block>
+<text (see 3rd note)>
+[fi COVER]
+//--------------------------------------------------------------------
+// PUBLISHER (if any)
+//--------------------------------------------------------------------
+[if PUBLISHER]
+PUBL
+<0x 4 bytes = size of PUBLISHER block>
+<text (see 3rd note)>
+[fi PUBLISHER]
+//--------------------------------------------------------------------
+
+
+// FINAL GROUP OF MARKS
+// Order of {NOTE,MARK,CORRECTION} : 
+// starts with user data at the begining of the file, 
+// going forwards to the end:
+//--------------------------------------------------------------------
+[for each {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK}]
+BKMK
+<0x 4 bytes = size of BKMK>
+<0x 4 bytes = TEXT position of the beginning of {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK}>
+//-------------------------------
+[if DRAWING]
+<0x FF FF FF FF>
+[else]
+<0x 4 bytes = TEXT position of the end of {NOTE,MARK,CORRECTION,BOOKMARK}>
+[fi DRAWING]
+...4
+...4
+//-------------------------------
+[if NOTE]
+	<0x xx xx xx (20)?>, xxxxxx=>RRGGBB color ???
+	<0x 00 00 00 02>
+[fi NOTE]
+[if MARK]
+	<0x xx xx xx (0F/00)??>, xxxxxx=>RRGGBB color ???
+	<0x 00 00 00 04>
+[fi MARK]
+[if CORRECTION]
+	<0x xx xx xx (6F)?>, xxxxxx=>RRGGBB color ???
+	<0x 00 00 00 02>
+[fi CORRECTION]
+[if DRAWING]
+	<0x xx xx xx (0F)?>, xxxxxx=>RRGGBB DRAWING's background color.
+	<0x 00 00 00 08>
+[fi DRAWING]
+[if BOOKMARK]
+	<0x xx xx xx 00>
+	<0x 00 00 00 01>
+[fi BOOKMARK]
+	// this one is a strange type of mark, of yet not identified use:
+	[if UNKNOWN_TYPE_YET_1]
+		<0x xx xx xx 00>
+		<0x 00 00 40 00>
+	[fi UNKNOWN_TYPE_YET_1]
+
+//-------------------------------
+[if BOOKMARK || (NOTE "without stored marked text")]
+	<0x FF FF FF FF>
+[else]
+	<0x 4 bytes = DATA pointer in INDEXES>
+[fi BOOKMARK]
+[if DRAWING || MARK]
+	<0x FF FF FF FF>
+[else]
+	<0x 4 bytes = DATA pointer in INDEXES>
+[fi]
+<0x 4 bytes = DATA pointer in INDEXES>
+[if DRAWING]
+	<0x 4 bytes = DATA pointer in INDEXES>
+[else]
+	<0x FF FF FF FF>
+[fi]
+//-------------------------------
+<0x FF FF FF FF>
+<0x FF FF FF FF>
+[next {NOTE,MARK,CORRECTION,DRAWING,BOOKMARK}]
+//--------------------------------------------------------------------
+
+[if length % 32 bit != 0] ???
+	<0x FF FF FF FF>
+[fi]
+
+// END OF FILE
+
+// by idleloop@yahoo.com, v0.2.e, 12/2009
+// http://www.angelfire.com/ego2/idleloop
--- a/format_docs/pdb/mobi.txt
+++ b/format_docs/pdb/mobi.txt
@ -0,0 +1,341 @@
+from (http://wiki.mobileread.com/wiki/MOBI)
+
+About
+-----
+
+MOBI is the format used by the the MobiPocket Reader. It may have a .mobi
+extension or it may have a .prc extension. The extension can be changed by the
+user to either of the accepted forms. In either case it may be DRM protected or
+non-DRM. The .prc extension is used because the PalmOS doesn't support any file
+extensions except .prc or .pdb. Note that Mobipocket prohibits their DRM format
+to be used on dedicated eBook readers that support other DRM formats.
+
+
+Description
+-----------
+
+MOBI format was originally an extension of the PalmDOC  format by adding
+certain HTML like tags to the data. Many MOBI formatted documents still use
+this form. However there is also a high compression version of this file format
+that compresses data to a larger degree in a proprietary manner. There are some
+third party programs that can read the eBooks in the original MOBI format but
+there are only a few third party program that can read the eBooks in the new
+compressed form. The higher compression mode is using a huffman coding scheme
+that has been called the Huff/cdic algorithm.
+
+From time to time features have been added to the format so new files may have
+problems if you try and read them with a down level reader. Currently the
+source files follow the guidelines in the Open eBook format.
+
+Note that AZW for the Amazon Kindle is the same format as MOBI except that it
+uses a slightly different DRM scheme.
+
+
+Format
+------
+
+Like PalmDOC, the Mobipocket file format is that of a standard Palm Database
+Format file. The header of that format includes the name of the database
+(usually the book title and sometimes a portion of the authors name) which is
+up to 31 bytes of data. The files are identified as Creator ID of MOBI and a
+Type of BOOK.
+
+
+PalmDOC Header
+--------------
+
+The first record in the Palm Database Format gives more information about the
+Mobipocket file. The first 16 bytes are almost identical to the first sixteen
+bytes of a PalmDOC format file.
+
+bytes   content             comments
+2       Compression         1 == no compression, 2 = PalmDOC compression,
+                            17480 = HUFF/CDIC compression.
+2       Unused              Always zero
+4       text length         Uncompressed length of the entire text of the book
+2       record count        Number of PDB records used for the text of the book.
+2       record size         Maximum size of each record containing text, always
+                            4096.
+4       Current Position    Current reading position, as an offset into the
+                            uncompressed text
+
+There are two differences from a Palm DOC file. There's an additional
+compression type (17480), and the Current Position bytes are used for a
+different purpose:
+
+bytes   content             comments
+2       Encryption Type     0 == no encryption, 1 = Old Mobipocket Encryption,
+                            2 = Mobipocket Encryption.
+2       Unknown             Usually zero
+
+The old Mobipocket Encryption scheme only allows the file to be registered
+with one PID, unlike the current encryption scheme that allows multiple PIDs to
+be used in a single file. Unless specifically mentioned, all the encryption
+information on this page refers to the current scheme.
+
+
+MOBI Header
+-----------
+
+Most Mobipocket file also have a MOBI header in record 0 that follows these
+16 bytes, and newer formats also have an EXTH header following the MOBI header,
+again all in record 0 of the PDB file format.
+
+The MOBI header is of variable length and is not documented. Some fields have
+been tentatively identified as follows:
+
+offset  bytes   content                 comments
+16      4       identifier              The characters M O B I
+20      4       header length           The length of the MOBI header, including
+                                        the previous 4 bytes
+24      4       Mobi type               The kind of Mobipocket file this is
+                                            2 Mobipocket Book
+                                            3 PalmDoc Book
+                                            4 Audio
+                                            257 News
+                                            258 News_Feed
+                                            259 News_Magazine
+                                            513 PICS
+                                            514 WORD
+                                            515 XLS
+                                            516 PPT
+                                            517 TEXT
+                                            518 HTML
+28      4       text Encoding           1252 = CP1252 (WinLatin1); 65001 = UTF-8
+32      4       Unique-ID               Some kind of unique ID number (random?)
+36      4       Generator version       Potentially the version of the
+                                        Mobipocket-generation tool. Always >=
+                                        the value of the "format version" field
+                                        and <= the version of mobigen used to
+                                        produce the file.
+40      40      Reserved                All 0xFF. In case of a dictionary, or
+                                        some newer file formats, a few bytes are
+                                        used from this range of 40 0xFFs
+80      4       First Non-book index?   First record number (starting with 0)
+                                        that's not the book's text
+84      4       Full Name Offset        Offset in record 0 (not from start of
+                                        file) of the full name of the book
+88      4       Full Name Length        Length in bytes of the full name of the
+                                        book
+92      4       Language                Book language code. Low byte is main
+                                        language 09= English, next byte is
+                                        dialect, 08 = British, 04 = US
+96      4       Input Language          Input language for a dictionary
+100     4       Output Language         Output language for a dictionary
+104     4       Format version          Potentially the version of the
+                                        Mobipocket format used in this file.
+                                        Always >= 1 and <= the value of the
+                                        "generator version" field.
+108     4       First Image record      First record number (starting with 0)
+                                        that contains an image. Image records
+                                        should be sequential. If there are
+                                        no images this will be 0xffffffff.
+112     4       HUFF record             Record containing Huff information
+                                        used in HUFF/CDIC decompression.
+116     4       HUFF count              Number of Huff records.
+122     4       DATP record             Unknown: Records starts with DATP.
+124     4       DATP count              Number of DATP records.
+128     4       EXTH flags              Bitfield. if bit 6, 0x40 is set, then
+                                        there's an EXTH record
+The following records are only present if the mobi header is long enough.
+132     36      ?                       32 unknown bytes, if MOBI is long enough
+168     4       DRM Offset              Offset to DRM key info in DRMed files.
+                                        0xFFFFFFFF if no DRM
+172     4       DRM Count               Number of entries in DRM info.
+174     4       DRM Size                Number of bytes in DRM info.
+176     4       DRM Flags               Some flags concerning the DRM info.
+180     6       ?
+186     2       Last Image record       Possible vaule with the last image
+                                        record. If there are no images in the
+                                        book this will be 0xffff.
+188     4       ?
+192     4       FCIS record             Unknown. Record starts with FCIS.
+196     4       ?
+200     4       FLIS record             Unknown. Records starts with FLIS.
+204     ?       ?                       Bytes to the end of the MOBI header,
+                                        including the following if the header
+                                        length >= 228. ( 244 from start of
+                                        record)
+242     2       Extra Data Flags        A set of binary flags, some of which
+                                        indicate extra data at the end of each
+                                        text block. This only seems to be valid
+                                        for Mobipocket format version 5 and 6
+                                        (and higher?), when the header length
+                                        is 228 (0xE4) or 232 (0xE8).
+
+
+EXTH Header
+-----------
+
+If the MOBI header indicates that there's an EXTH header, it follows immediately
+after the MOBI header. since the MOBI header is of variable length, this isn't
+at any fixed offset in record 0. Note that some readers will ignore any EXTH
+header info if the mobipocket version number specified in the MOBI header is 2
+or less (perhaps 3 or less).
+
+The EXTH header is also undocumented, so some of this is guesswork.
+
+bytes   content             comments
+4       identifier          the characters E X T H
+4       header length       the length of the EXTH header, including the previous 4 bytes
+4       record Count        The number of records in the EXTH header. the rest of the EXTH header consists of repeated EXTH records to the end of the EXTH length.
+        EXTH record start   Repeat until done.
+4       record type         Exth Record type. Just a number identifying what's stored in the record
+4       record length       length of EXTH record = L , including the 8 bytes in the type and length fields
+L-8     record data         Data.
+        EXTH record end     Repeat until done.
+
+There are lots of different EXTH Records types. Ones found so far in Mobipocket
+files are listed here, with possible meanings. Hopefully the table will be
+filled in as more information comes to light.
+
+record type    usual length     name             comments
+1                               drm_server_id
+2                               drm_commerce_id
+3                               drm_ebookbase_book_id
+100                             author
+101                             publisher
+102                             imprint
+103                             description
+104                             isbn
+105                             subject
+106                             publishingdate
+107                             review
+108                             contributor
+109                             rights
+110                             subjectcode
+111                             type
+112                             source
+113                             asin
+114                             versionnumber
+115                             sample
+116                             startreading
+118                             retail price (as text)
+119                             retail price currency (as text)
+201                             coveroffset
+202                             thumboffset
+203                             hasfakecover
+204                             204 Unknown
+205                             205 Unknown
+206                             206 Unknown
+207                             207 Unknown
+208                             208 Unknown
+300                             300 Unknown
+401                             clippinglimit
+402                             publisherlimit
+403                             403 Unknown
+404                             404 ttsflag
+501            4                cdetype          PDOC - Personal Doc;
+                                                 EBOK - ebook;
+502                             lastupdatetime
+503                             updatedtitle
+
+And now, at the end of Record 0 of the PDB file format, we usually get the full
+file name, the offset of which is given in the MOBI header.
+
+
+Variable-width integers
+-----------------------
+
+Some parts of the Mobipocket format encode data as variable-width integers.
+These integers are represented big-endian with 7 bits per byte in bits 1-7. They
+may be either forward-encoded, in which case only the LSB has bit 8 set, or
+backward-encoded, in which case only the MSB has bit 8 set. For example, the
+number 0x11111 would be represented forward-encoded as:
+
+    0x04 0x22 0x91
+
+And backward-encoded as: 
+
+    0x84 0x22 0x11
+
+
+Trailing entries
+----------------
+
+The Extra Data Flags field of the MOBI header indicates which, if any, trailing
+entries are appended to the end of each text record. Each set bit in the field
+indicates a trailing entry. The entries appear to occur in bit-order; e.g.,
+trailing entry 1 immediately follows the text content and entry 16 occurs at
+the very end of the record. The effect and exact details of most of these
+entries is unknown. The trailing entries indicated by bits 2-16 appear to
+follow a common format. That format is:
+
+    <data><size>
+
+Where <size> is the size of the entire trailing entry (including the size of
+<size>) as a backward-encoded Mobipocket variable-width integer.
+
+Only a few bits have been identified
+
+bit     Data at end of records
+0x0001  Multi-byte character overlaps
+0x0002  Some data to help with indexing
+0x0004  Some data about uncrossable breaks
+
+
+Multibyte character overlap
+---------------------------
+
+When bit 1 of the Extra Data Flags field is set, each record is followed by a
+trailing entry containing any extra bytes necessary to complete a multibyte
+character which crosses the record boundary. The bytes do not participate in
+compression regardless which compression scheme is used for the file. However,
+unlike the trailing data bytes, the multibytes (including the count byte) do
+get included in any encryption. The overlapping bytes then re-appear as normal
+content at the beginning of the following record. The trailing entry ends with
+a byte containing a count of the overlapping bytes plus additional flags.
+
+offset  bytes   content         comments
+0       0-3	N   terminal bytes
+                of a multibyte
+                character	
+N       1       Size & flags    bits 1-2 encode N, use of bits 3-8 is unknown 
+
+
+PalmDOC Compression
+-------------------
+
+PalmDOC uses LZ77 compression techniques. DOC files can contain only compressed
+text. The format does not allow for any text formatting. This keeps files small,
+in keeping with the Palm philosophy. However, extensions to the format can use
+tags, such as HTML or PML, to include formatting within text. These extensions
+to PalmDoc are not interchangeable and are the basis for most eBook Reader
+formats on Palm devices.
+
+LZ77 algorithms achieve compression by replacing portions of the data with
+references to matching data that has already passed through both encoder and
+decoder. A match is encoded by a pair of numbers called a length-distance pair,
+which is equivalent to the statement "each of the next length characters is
+equal to the character exactly distance characters behind it in the uncompressed
+stream." (The "distance" is sometimes called the "offset" instead.)
+
+In the PalmDoc format, a length-distance pair is always encoded by a two-byte
+sequence. Of the 16 bits that make up these two bytes, 11 bits go to encoding
+the distance, 3 go to encoding the length, and the remaining two are used to
+make sure the decoder can identify the first byte as the beginning of such a
+two-byte sequence. The exact alforithm needed to decode the compressed text can
+be found on the PalmDOC page.
+
+PalmDOC data is always divided into 4096 byte blocks and the blocks are acted
+upon independently.
+
+PalmDOC does have support for bookmarks. These pointers are named and refer to
+an offset location in a file. If the file is edited these locations may no
+longer refer to the correct locations. Some reading programs allow the user to
+enter or edit these bookmarks while others treat them as a TOC. Some reading
+programs may ignore them entirely. They are stored at the end of the file itself
+so the full file needs to be scanned when loaded to find them. 
+
+
+MBP
+---
+
+This is the extension used on a side file (auxiliary) for MOBI formatted eBooks.
+It is used to store metadata  used by the library software and also to store
+user entered data like bookmarks, annotations, last read position. This file is
+created automatically by the reader program when the eBook is first opened and
+has a .mbp extension. The Library management software in MobiPocket uses this
+file to get information displayed in the library window such as title and author
+so that it won't have to open the larger eBook file.
+
--- a/format_docs/pdb/palmdoc.txt
+++ b/format_docs/pdb/palmdoc.txt
@ -0,0 +1,25 @@
+PalmDoc Format
+--------------
+
+The format is that of a standard Palm Database Format file. The header of that
+format includes the name of the database (usually the book title and sometimes
+a portion of the authors name) which is up to 31 bytes of data. This string of
+characters is terminated with a 0 in the C style. The files are identified as
+Creator ID of REAd and a Type of TEXt. 
+
+
+Record 0
+--------
+
+The first record in the Palm Database Format gives more information about the
+PalmDOC file, and contains 16 bytes.
+
+bytes   content             comments 
+
+2       Compression         1 == no compression, 2 = PalmDOC compression (see below)
+2       Unused              Always zero
+4       text length         Uncompressed length of the entire text of the book
+2       record count        Number of PDB records used for the text of the book.
+2       record size         Maximum size of each record containing text, always 4096
+4       Current Position    Current reading position, as an offset into the uncompressed text
+
--- a/format_docs/pdb/pdb_format.txt
+++ b/format_docs/pdb/pdb_format.txt
@ -0,0 +1,104 @@
+Format
+------
+
+A PDB file can be borken into multiple parts. The header, record 0 and data.
+values stored within the various parts are big-endian byte order. The data
+part is is broken down into multiple sections. The section count and offsets
+are referened in the PDB header. Sections can be no more than 65505 bytes in
+length.
+
+
+Layout
+------
+
+PDB files take the format: DB header followed by the record 0 which has
+contained format specific iformation followed by data.
+
+    DB Header
+0   Record 0
+.
+.   Data (borken down into sections)
+.
+
+
+Palm Database Header Format
+
+bytes   content             comments 
+
+32      name                database name. This name is 0 terminated in the
+                            field and will be used as the file name on a
+                            computer. For eBooks this usually contains the
+                            title and may have the author depending on the
+                            length available.
+
+2       attributes          bit field.
+                            0x0002 Read-Only
+                            0x0004 Dirty AppInfoArea
+                            0x0008 Backup this database (i.e. no conduit exists)
+                            0x0010 (16 decimal) Okay to install newer over
+                                    existing copy, if present on PalmPilot
+                            0x0020 (32 decimal) Force the PalmPilot to reset
+                                    after this database is installed
+                            0x0040 (64 decimal) Don't allow copy of file to be
+                                    beamed to other Pilot.
+
+2       version             file version
+
+4       creation date       No. of seconds since start of January 1, 1904.
+
+4       modification date   No. of seconds since start of January 1, 1904.
+
+4       last backup date    No. of seconds since start of January 1, 1904.
+
+4       modificationNumber
+
+4       appInfoID           offset to start of Application Info (if present)
+                            or null
+
+4       sortInfoID          offset to start of Sort Info (if present) or null
+
+4       type                See above table. (For Applications this data will
+                            be 'appl')
+
+4   	creator             See above table. This program will be launched if
+                            the file is tapped
+
+4       uniqueIDseed        used internally to identify record
+
+4       nextRecordListID    Only used when in-memory on Palm OS. Always set to
+                            zero in stored files.
+
+2       number of Records   number of records in the file - N
+
+8N      record Info List
+
+        start of record
+        info entry          Repeat N times to end of record info entry
+
+4       record Data Offset  the offset from the start of the PDB of this record
+
+1       record Attributes   bit field. The least significant four bits are used
+                            to represent the category values. These are the
+                            categories used to split the databases for viewing
+                            on the screen. A few of the 16 categories are
+                            pre-defined but the user can add their own. There
+                            is an undefined category for use if the user or
+                            programmer hasn't set this.
+                            0x10 (16 decimal) Secret record bit.
+                            0x20 (32 decimal) Record in use (busy bit).
+                            0x40 (64 decimal) Dirty record bit.
+                            0x80 (128, unsigned decimal) Delete record on
+                                  next HotSync.
+
+3       UniqueID            The unique ID for this record. Often just a
+                            sequential count from 0
+
+        end of record
+        info entry
+
+2?      Gap to data        traditionally 2 zero bytes to Info or raw data
+
+?       Records            The actual data in the file. AppInfoArea (if
+                           present), SortInfoArea (if present) and then
+                           records sequentially
+
--- a/format_docs/pdb/pdb_types.txt
+++ b/format_docs/pdb/pdb_types.txt
@ -0,0 +1,34 @@
+Palm Database File Code
+-----------------------
+
+Reader                      Type Code
+
+Adobe Reader                .pdfADBE
+PalmDOC                     TEXtREAd
+BDicty                      BVokBDIC
+DB (Database program)       DB99DBOS
+eReader                     PNRdPPrs
+eReader                     DataPPrs
+FireViewer (ImageViewer)    vIMGView
+HanDBase                    PmDBPmDB
+InfoView                    InfoINDB
+iSilo                       ToGoToGo
+iSilo 3                     SDocSilX
+JFile                       JbDbJBas
+JFile Pro                   JfDbJFil
+LIST                        DATALSdb
+MobileDB                    Mdb1Mdb1
+MobiPocket                  BOOKMOBI
+Plucker                     DataPlkr
+QuickSheet                  DataSprd
+SuperMemo                   SM01SMem
+TealDoc                     TEXtTlDc
+TealInfo                    InfoTlIf
+TealMeal                    DataTlMl
+TealPaint                   DataTlPt
+ThinkDB                     dataTDBP
+Tides                       TdatTide
+TomeRaider                  ToRaTRPW
+Weasel                      zTXTGPlm
+WordSmith                   BDOCWrdS 
+
--- a/format_docs/pdb/plucker.html
+++ b/format_docs/pdb/plucker.html
--- a/format_docs/pdb/pml.txt
+++ b/format_docs/pdb/pml.txt
@ -0,0 +1,936 @@
+Palm Markup Language
+--------------------
+
+This page explains how to use the Palm Markup Language (PML) to specify
+formatting and other information in a text file for later reading using the
+eReader.
+
+PML commands start with a backslash, "\", and usually consist of a single
+character after that. Some PML commands are paired, such as those that specify
+italicized text. Other commands are directives, such as the "\p", which
+specifies a page break. PML is not meant to be an industrial-strength markup
+language, but it is easy to understand, easy to parse, and creates high-quality
+electronic books.
+
+Since PML and Palm DropBook are not without flaws, there is a page of Tips and
+Pitfalls.
+
+
+Let's Dive Right In
+-------------------
+
+palmsample.txt contains examples of formatting text, specifying chapters, etc.
+Use it to start from, or just as an example when making your own books.
+
+The following table specifies the Palm Markup Language commands, and what
+they do.
+
+\p                              New page
+\x                              New chapter; also causes a new page break.
+                                Enclose chapter title (and any style codes)
+                                with \x and \x
+\Xn                             New chapter, indented n levels (n between 0 and
+                                4 inclusive) in the Chapter dialog; doesn't
+                                cause a page break. Enclose chapter title (and
+                                any style codes) with \Xn and \Xn
+\Cn="Chapter title"             Insert "Chapter title" into the chapter
+                                listing, with level n (like \Xn). The text is
+                                not shown on the page and does not force a page
+                                break. This can sometimes be useful to insert a
+                                chapter mark at the beginning of an
+                                introduction to the chapter, for example.
+\c                              Center this block of text; close with \c on
+                                beginning of line
+\r                              Right justify text block; close with \r on
+                                beginning of line
+\i                              Italicize block; close with \i
+\u                              Underline block; close with \u
+\o                              Overstrike block; close with \o
+\v                              Invisible text; close with \v (can be used for
+                                comments)
+\t                              Indent block. Start at beginning of a line,
+                                close with \t at end of a line
+\T="50%"                        Indents the specified percentage of the screen
+                                width, 50% in this case. If the current drawing
+                                position is already past the specified screen
+                                location, this tag is ignored.
+\w="50%"                        Embed a horizontal rule of a given percentage
+                                width of the screen, in this case 50%. This tag
+                                causes a line break before and after it. The
+                                rule is centered. The percent sign is mandatory.
+\n                              Switch to the "normal" font, which is specified
+                                by the user
+\s                              Switch to stdFont; close with \s to revert to
+                                normal font
+\b                              Switch to boldFont; close with \b to revert to
+                                normal font (deprecated; use \B instead)
+\l                              Switch to largeFont; close with \l to revert to
+                                normal font
+\B                              Mark text as bold. Unlike the \b tag, \B
+                                doesn't change the font, so you can have large
+                                bold text. You cannot mix \b and \B in the same
+                                PML file.
+\Sp                             Mark text as superscript. Should not be mixed
+                                with other styles such as bold, italic, etc.
+                                Enclose superscripted text with \Sp.
+\Sb                             Mark text as subscript. Should not be mixed
+                                with other styles such as bold, italic, etc.
+                                Enclose subscripted text with \Sb.
+\k                              Make enclosed text into small-caps; close with
+                                \k. Any characters enclosed in \k tags
+                                (including those with accents) are made
+                                uppercase and are rendered at a smaller point
+                                size than a regular uppercase character.
+\\                              Represents a single backslash
+\aXXX                           Insert non-ASCII character whose Windows 1252
+                                code is decimal XXX. See the PML character
+                                table for details.
+\UXXXX                          Insert non-ASCII character whose Unicode code
+                                is hexidecimal XXXX. See the Extended PML
+                                character table for details.
+\m="imagename.png"              Insert the named image. See the section on
+                                Images below.
+\q="#linkanchor"Some text\q     Reference a link anchor which is at another
+                                spot in the document. The string after the
+                                anchor specification and before the trailing\q
+                                is underlined or otherwise shown to be a link
+                                when viewing the document.
+\Q="linkanchor"                 Specify a link anchor in the document.
+\-                              Insert a soft hyphen. A soft hyphen shows up
+                                only if it is necessary to break a word across
+                                a line.
+\Fn="footnote1"1\Fn             Link the "1" to a footnote whose name is
+                                footnote1, tagged at the end of the PML
+                                document. See the section on Footnotes and
+                                Sidebars below.
+\Sd="sidebar1"Sidebar\Sd        Link the "Sidebar" text to a sidebar whose name
+                                is sidebar1, tagged at the end of the PML
+                                document. See the section on Footnotes and
+                                Sidebars below.
+\I                              Mark as a reference index item. Enclose index
+                                item (and any style codes) with \I and \I. See
+                                Creating Dictionaries for more information.
+
+
+Examples
+--------
+
+\pThis is a new page
+
+\xChapter III\x
+
+\X1Chapter III, part A\X1
+
+\p\C="Introduction"The following story is one of my favorites...
+
+\cProperty of
+Gateway Senior High School
+\c
+
+\rJustify my love
+\r
+
+This stuff is \ireally\i cool.
+
+I just read \uMoby Dick.\u
+
+This is a \obig\o mistake.
+
+Copyright 1917\v Date of magazine serialization \v
+
+\tOnce upon a time
+there was a wicked queen
+called Esmerelda.\t
+
+Mammals:\T="40%"Lions
+\T="40%"Tigers
+\T="40%"Bears
+
+He walked away.
+\w="80%"
+Later that day, he ran into an old friend.
+
+\nIn the normal ways...
+
+The \stitle page\s should be formatted...
+
+I just \bcan't\b believe that you...
+
+This \lREALLY\l is a large tiger...
+
+This \Bbold\B text can be either \l\Blarge bold\B\l or \s\Bsmall bold\B\s.
+
+e\Spx + 2\Sp = 9
+
+C\Sb2\SbH\Sb3\SbO\Sb2\Sb should be used in moderation.
+
+See also \kanteater\k.
+
+The DOS prompt said "C:\\windows\\"
+
+The man said \a147Yeah.\a148
+
+Arrows can point \U2190 left or right \U2192.
+
+A Yield sign looks like this: \m="yieldsign.png".
+
+See the \q="#detailedinstructions"Detailed Instructions\q for how to install your eBook.
+
+\Q="detailedinstructions"\bDetailed Instructions\b - This section
+describes how to install an eBook to your handheld device.
+
+Very long words like anti\-dis\-establish\-ment\-arian\-ism may benefit from
+the use of soft hyphens.
+
+The Emerson case\Fn="emerson"[1]\Fn will be very important...
+
+For more information, see the \Sd="moreinfo"sidebar\Sd.
+
+\I\Baardvark\B\I \in.\i a large burrowing nocturnal mammal that feeds especially on termites and ants
+
+
+Footnotes and Sidebars
+----------------------
+
+Footnotes and Sidebars are specified with an XML-like syntax at the end of the
+PML document. For example,
+
+<sidebar id="sidebar1">
+Here's some \itext\i for a sidebar.
+</sidebar>
+
+would specify the sidebar to be displayed when the user taps on a sidebar link
+in the text that was specified using the \Sd tag.
+
+Any text or PML placed after the first footnote or sidebar is ignored as part
+of the book text.
+
+Sidebars and footnotes can include most PML features, but there are some PML
+tags that cannot be used inside of a sidebar or footnote.
+
+These include
+Chapters       \x, \X, \C
+Links       \q, \Q
+Footnotes           \Fn
+Sidebars    \Sd
+
+See the palmsample.txt file for examples of how to use many of the PML tags.
+
+
+Images
+------
+
+The following rules are intended to guarantee that images in your eBook will be
+viewable on all platforms that eReader runs on.
+
+On low-resolution Palm OS handhelds, an image wider than 158 pixels or taller
+than 148 pixels will be represented in the text by a thumbnail that the user
+can tap to view the entire image. Images smaller than 158 x 148 will be
+presented in-line with the text.
+
+On high-resolution Palm OS handhelds (those having screens of 320x320 pixels or
+more), images smaller than 158 by 148 pixels will be pixel-doubled. Images
+larger than 158x148 may be shown in-line with the text, if they will fit on
+the screen.
+
+On non-Palm OS platforms, small images will be scaled up appropriately. Large
+images will be scaled down to fit on the page; in this case the user can tap on
+the image to view the entire image and zoom in or out.
+
+For DropBook to find the image, it must be present in a directory whose name
+matches that of the PML text file. For example, if "pmlsample.txt" contains a
+reference to an image called "intro.png", then there must be a directory called
+"pmlsample_img" that contains intro.png. The directory's name is the name of
+the PML file (without the .txt extension) with "_img" appended.
+
+Images must be in PNG format and cannot be filtered or interlaced. Image depth
+must be 8 bits or less. Any color table may be used for color images.
+
+Image files must be less than or equal to 65505 bytes in size, since they are
+embedded into the .pdb format of the book; Palm database records are limited to
+65505 bytes in length. Since images are compressed, the actual image displayed
+by the reader may be much larger than 64K.
+
+Any or all of these restrictions may eventually be removed.
+
+
+Adding a Title, Cover Art, and Other Meta-information to Your eBook
+-------------------------------------------------------------------
+
+DropBook normally presents a dialog in which the title and other information
+for the eBook may be specified. This information may be embedded in the PML
+file instead.
+
+To specify the eBook title as it will appear in the Open dialog on the
+handheld, place a block of invisible comment text at the beginning of the file
+using \v tags. Inside this comment block, put the string TITLE="My eBook",
+where "My eBook" is replaced with the name of your eBook. It should look
+something like this:
+
+\vTITLE="Palm Sample Document"\v
+
+You can also specify the author using the AUTHOR meta-tag, the publisher with
+PUBLISHER, copyright information with COPYRIGHT, and the eBook ISBN with EISBN.
+A fully-specified set of meta-information might appear in PML as:
+
+\vTITLE="Palm Sample Document" AUTHOR="Sam Morgenstern" PUBLISHER="eReader.com"
+EISBN="X-XXXX-XXXX" COPYRIGHT="Copyright \a169 2004 by Sam Morgenstern"\v
+
+Cover art: If an image named "cover.png" is present in the eBook, it is assumed
+to be the cover art for the eBook. See the rules for images for sizing and
+other information.
+
+Some or all of this information may appear in the book information dialog in
+eReader, and may be used for other purposes in future products.
+
+
+Creating Dictionaries
+---------------------
+
+The \I PML tag is used to delimit an index item. Example: \Iaardvark\I
+
+Each entry must start in the normal font. If DropBook shows an error beginning
+with "No styles permitted before...", there is probably a missing end style tag
+before the text shown in the error message.
+
+Links, chapters and other PML structures are not permitted in dictionaries.
+Images, however, are.
+
+A special dictionary entry, "(Front matter)" is shown before other entries in
+the list of entries, and should be used to include pronunciation symbols and
+other front matter.
+
+Note that use of dictionaries requires eReader Pro.
+
+
+Tips and Pitfalls
+-----------------
+
+This page explains some common mistakes, some bugs in DropBook and/or the
+eReader, and some techniques that will allow you to create quality electronic
+books for the eReader.
+
+    * Check out the Converting to Palm eBooks page for some pointers on
+      converting text from various formats into the Palm Markup Language.
+    * Use a return at the end of each paragraph, not each line.
+    * Using an extra return between paragraphs reads easier than paragraph
+      indentation.
+    * The eReader doesn't display empty lines at the top of a page. If you need
+      to have some "empty" lines at the top of a page, put a space on each line.
+    * Don't use tables if you can possibly avoid it.
+
+      None of the fonts that the eReader supports are monospaced, so tables can
+      be difficult to represent. Break out the information in another way, or
+      use the \T tag, but beware of tables that look great on a Palm OS
+      handheld but not on a Pocket PC or vice versa.
+
+    * The Reader breaks lines on spaces, dashes or underscores. This has
+      several implications.
+
+         1. Don't fill more than a line with spaces, dashes or underscores.
+            There's a bug (which will be fixed in a future release) which
+            causes MakeBook to hang on such a line. Note that in the large
+            font, the number of spaces, dashes or underscores will be much
+            smaller than in the small font.
+         2. A string such as He shouted "Wait!--" may place the last quote on
+            the beginning of a line, since the line would break after the
+            second dash. Prevent this by using the PML string: He shouted
+            "Wait!\a150\a150". The non-breaking dash, code 150, will not break
+            a line. Use \a160 for a non-breaking space. Even better: use \a151,
+            a long dash, instead of two short dashes.
+
+    * The justification codes \c and \r (center and right justification) must
+      have closing codes on the beginning of the line following the justified
+      text.
+    * The indentation tag \t must have a closing tag at the end of a line of
+      the indented text.
+    * Use \s (small font) in the title page(s) of books to force the page(s) to
+      format nicely. Other than that, \n, \s and \l should rarely be necessary;
+      the font size used for most text display should be chosen by the user.
+
+
+Converting Uncommon Characters to PML
+-------------------------------------
+
+Use this chart to convert uncommon characters to their Palm Markup Language
+(PML) equivalent. Most characters are simply represented as themselves in PML
+and don't require this chart. But some uncommon characters can only be
+represented in PML by their "\aXXX" syntax. Use this chart to look up that
+"\aXXX" syntax.
+
+For Example, if you wanted to write the following phrase in PML:
+
+    Copyright © 1999 by Samuel Morgenstern
+
+In PML, you would write it as:
+
+    Copyright \a169 1999 by Samuel Morgenstern
+
+Char    HTML # Code HTML Char Code  PML Char Code  Description
+
+        &#32;       -               Normal space
+!       &#33;       -       !       Exclamation
+"       &#34;       &quot;  "       Double quote
+#       &#35;       -       #       Hash
+$       &#36;       -       $       Dollar
+%       &#37;       -       %       Percent
+&       &#38;       &amp;   &       Ampersand
+'       &#39;       -       '       Apostrophe
+(       &#40;       -       (       Open bracket
+)       &#41;       -       )       Close bracket
+*       &#42;       -       *       Asterisk
+       &#43;       -       +       Plus sign
+,       &#44;       -       ,       Comma
+-       &#45;       -       -       Minus sign
+.       &#46;       -       .       Period
+/       &#47;       -       /       Forward slash
+0       &#48;       -       0       Digit 0
+1       &#49;       -       1       Digit 1
+2       &#50;       -       2       Digit 2
+3       &#51;       -       3       Digit 3
+4       &#52;       -       4       Digit 4
+5       &#53;       -       5       Digit 5
+6       &#54;       -       6       Digit 6
+7       &#55;       -       7       Digit 7
+8       &#56;       -       8       Digit 8
+9       &#57;       -       9       Digit 9
+:       &#58;       -       :       Colon
+;       &#59;       -       ;       Semicolon
+        &#60;       &lt;    <       Less than
+=       &#61;       -       =       Equals
+        &#62;       &gt;    >       Greater than
+?       &#63;       -       ?       Question mark
+@       &#64;       -       @       At sign
+A       &#65;       -       A       A
+B       &#66;       -       B       B
+C       &#67;       -       C       C
+D       &#68;       -       D       D
+E       &#69;       -       E       E
+F       &#70;       -       F       F
+G       &#71;       -       G       G
+H       &#72;       -       H       H
+I       &#73;       -       I       I
+J       &#74;       -       J       J
+K       &#75;       -       K       K
+L       &#76;       -       L       L
+M       &#77;       -       M       M
+N       &#78;       -       N       N
+O       &#79;       -       O       O
+P       &#80;       -       P       P
+Q       &#81;       -       Q       Q
+R       &#82;       -       R       R
+S       &#83;       -       S       S
+T       &#84;       -       T       T
+U       &#85;       -       U       U
+V       &#86;       -       V       V
+W       &#87;       -       W       W
+X       &#88;       -       X       X
+Y       &#89;       -       Y       Y
+Z       &#90;       -       Z       Z
+[       &#91;       -       [       Open square bracket
+\       &#92;       -       \\       Backslash
+]       &#93;       -       ]       Close square bracket
+^       &#94;       -       ^       Caret
+_       &#95;       -       _       Underscore
+`       &#96;       -       `       Grave accent
+a       &#97;       -       a       a
+b       &#98;       -       b       b
+c       &#99;       -       c       c
+d       &#100;       -       d       d
+e       &#101;       -       e       e
+f       &#102;       -       f       f
+g       &#103;       -       g       g
+h       &#104;       -       h       h
+i       &#105;       -       i       i
+j       &#106;       -       j       j
+k       &#107;       -       k       k
+l       &#108;       -       l       l
+m       &#109;       -       m       m
+n       &#110;       -       n       n
+o       &#111;       -       o       o
+p       &#112;       -       p       p
+q       &#113;       -       q       q
+r       &#114;       -       r       r
+s       &#115;       -       s       s
+t       &#116;       -       t       t
+u       &#117;       -       u       u
+v       &#118;       -       v       v
+w       &#119;       -       w       w
+x       &#120;       -       x       x
+y       &#121;       -       y       y
+z       &#122;       -       z       z
+{       &#123;       -       {       Left brace
+|       &#124;       -       |       Vertical bar
+}       &#125;       -       }       Right brace
+~       &#126;       -       ~       Tilde
+
+        &#160;       &nbsp;     \a160       Non-breaking space
+        &#161;       &iexcl;    \a161       Inverted exclamation
+        &#162;       &cent;     \a162       Cent sign
+        &#163;       &pound;    \a163       Pound sign
+        &#164;       &curren;   \a164       Currency sign
+        &#165;       &yen;      \a165       Yen sign
+        &#166;       &brvbar;   \a166       Broken bar
+        &#167;       &sect;     \a167       Section sign
+        &#168;       &uml;      \a168       Umlaut or diaeresis
+        &#169;       &copy;     \a169       Copyright sign
+        &#170;       &ordf;     \a170       Feminine ordinal
+        &#171;       &laquo;    \a171       Left angle quotes
+        &#172;       &not;      \a172       Logical not sign
+        &#173;       &shy;      \a173       Soft hyphen
+        &#174;       &reg;      \a174       Registered trademark
+        &#175;       &macr;     \a175       Spacing macron
+        &#176;       &deg;      \a176       Degree sign
+        &#177;       &plusmn;   \a177       Plus-minus sign
+        &#178;       &sup2;     \a178       Superscript 2
+        &#179;       &sup3;     \a179       Superscript 3
+        &#180;       &acute;    \a180       Spacing acute
+        &#181;       &micro;    \a181       Micro sign
+        &#182;       &para;     \a182       Paragraph sign
+        &#183;       &middot;   \a183       Middle dot
+        &#184;       &cedil;    \a184       Spacing cedilla
+        &#185;       &sup1;     \a185       Superscript 1
+        &#186;       &ordm;     \a186       Masculine ordinal
+        &#187;       &raquo;    \a187       Right angle quotes
+        &#188;       &frac14;   \a188       One quarter
+        &#189;       &frac12;   \a189       One half
+        &#190;       &frac34;   \a190       Three quarters
+        &#191;       &iquest;   \a191       Inverted question mark
+        &#192;       &Agrave;   \a192       A grave
+        &#193;       &Aacute;   \a193       A acute
+        &#194;       &Acirc;    \a194       A circumflex
+        &#195;       &Atilde;   \a195       A tilde
+        &#196;       &Auml;     \a196       A diaeresis
+        &#197;       &Aring;    \a197       A ring
+        &#198;       &Aelig;    \a198       AE ligature
+        &#199;       &Ccedil;   \a199       C cedilla
+        &#200;       &Egrave;   \a200       E grave
+        &#201;       &Eacute;   \a201       E acute
+        &#202;       &Ecirc;    \a202       E circumflex
+        &#203;       &Euml;     \a203       E diaeresis
+        &#204;       &Igrave;   \a204       I grave
+        &#205;       &Iacute;   \a205       I acute
+        &#206;       &Icirc;    \a206       I circumflex
+        &#207;       &Iuml;     \a207       I diaeresis
+        &#208;       &ETH;      \a208       Eth
+        &#209;       &Ntilde;   \a209       N tilde
+        &#210;       &Ograve;   \a210       O grave
+        &#211;       &Oacute;   \a211       O acute
+        &#212;       &Ocirc;    \a212       O circumflex
+        &#213;       &Otilde;   \a213       O tilde
+        &#214;       &Ouml;     \a214       O diaeresis
+        &#215;       &times;    \a215       Multiplication sign
+        &#216;       &Oslash;   \a216       O slash
+        &#217;       &Ugrave;   \a217       U grave
+        &#218;       &Uacute;   \a218       U acute
+        &#219;       &Ucirc;    \a219       U circumflex
+        &#220;       &Uuml;     \a220       U diaeresis
+        &#221;       &Yacute;   \a221       Y acute
+        &#222;       &THORN;    \a222       THORN
+        &#223;       &szlig;    \a223       sharp s
+        &#224;       &agrave;   \a224       a grave
+        &#225;       &aacute;   \a225       a acute
+        &#226;       &acirc;    \a226       a circumflex
+        &#227;       &atilde;   \a227       a tilde
+        &#228;       &auml;     \a228       a diaeresis
+        &#229;       &aring;    \a229       a ring
+        &#230;       &aelig;    \a230       ae ligature
+        &#231;       &ccedil;   \a231       c cedilla
+        &#232;       &egrave;   \a232       e grave
+        &#233;       &eacute;   \a233       e acute
+        &#234;       &ecirc;    \a234       e circumflex
+        &#235;       &euml;     \a235       e diaeresis
+        &#236;       &igrave;   \a236       i grave
+        &#237;       &iacute;   \a237       i acute
+        &#238;       &icirc;    \a238       i circumflex
+        &#239;       &iuml;     \a239       i diaeresis
+        &#240;       &eth;      \a240       eth
+        &#241;       &ntilde;   \a241       n tilde
+        &#242;       &ograve;   \a242       o grave
+        &#243;       &oacute;   \a243       o acute
+        &#244;       &ocirc;    \a244       o circumflex
+        &#245;       &otilde;   \a245       o tilde
+        &#246;       &ouml;     \a246       o diaeresis
+        &#247;       &divide;   \a247       division sign
+        &#248;       &oslash;   \a248       o slash
+        &#249;       &ugrave;   \a249       u grave
+        &#250;       &uacute;   \a250       u acute
+        &#251;       &ucirc;    \a251       u circumflex
+        &#252;       &uuml;     \a252       u diaeresis
+        &#253;       &yacute;   \a253       y acute
+        &#254;       &thorn;    \a254       thorn
+        &#255;       &yuml;     \a255       y diaeresis
+,       &#8218;      &sbquo;    \a130       single low quote
+        &#402;       &fnof;     \a131       Scripted f
+        &#8222;      &bdquo;    \a132       low quote
+        &#8230;      &hellip;   \a133       Ellipsis
+        &#8224;      &dagger;   \a134       Dagger
+        &#8225;      &Dagger    \a135       Double dagger
+        &#352;       &Scaron;   \a138       Large S w/inverted caret
+<       &#8249;      &lsaquo;   \a139       single left angle quote
+        &#338;       &OElig;    \a140       Large combined oe
+        &#8216;      &lsquo;    \a145       Open single smart quote
+        &#8217;      &rsquo;    \a146       Close single smart quote
+        &#8220;      &ldquo;    \a147       Open double smart quote
+        &#8221;      &rdquo;    \a148       Close double smart quote
+        &#8226;      &bull;     \a149       Bullet
+        &#8211;      &ndash;    \a150       Small dash (en dash)
+        &#8212;      &mdash;    \a151       Large dash (em dash)
+        &#8482;      &trade;    \a153       Trademark
+        &#353;       &scaron;   \a154       Small S w/inverted caret
+>       &#8250;      &rsaquo;   \a155       single right angle quote
+        &#339;       &oelig;    \a156       Small combined oe
+        &#376;       &Yuml;     \a159       Large Y with diaeresis
+
+
+Extended Character Set
+----------------------
+
+In addition to the special characters supported by earlier versions of eReader
+(which can be accessed using the \a### tag), all versions of eReader Pro and
+eReader version 2.4 and later include support for additional special characters
+and symbols. These symbols can be accessed using the \U#### tag, where #### are
+four hexidecimal digits giving the Unicode encoding of the special character.
+
+Only the limited subset of Unicode characters given in the table below are
+supported. In addition, some of the characters that are included in the table
+are not present in eReader Pro versions prior to 2.4. To ensure that the
+characters are displayed correctly, books using these tags should be read using
+eReader or eReader Pro version 2.4 or later.
+
+On Palm OS handhelds these special symbols are only available in one size,
+matching the "Small" font. For best results on Palm OS handhelds the \U tag
+should only be used inside blocks set to the "Small" font by way of \s tags.
+On Palm OS handhelds these special characters are not affected by the font tags
+(\s, \l, \b and \n), the bold style tag (\B), or the small caps style tag (\k).
+
+If the \U characters are not showing up correctly using eReader on your Windows
+desktop or laptop this problem is a result of the fonts for eReader not being
+installed properly. The solution is to go to the directory C:\Windows\Fonts\
+and "double click" on each font that starts with "Maynard". This will open each
+font and allow the system to register it. Close the windows that were opened a
+result of the mouse clicks and the problem should be resolved.
+
+Char     HTML Code     PML Code     Description
+
+Latin Extended-A
+Ā     &#256;     \U0100     LATIN CAPITAL LETTER A WITH MACRON
+ā     &#257;     \U0101     LATIN SMALL LETTER A WITH MACRON
+Ă     &#258;     \U0102     LATIN CAPITAL LETTER A WITH BREVE
+ă     &#259;     \U0103     LATIN SMALL LETTER A WITH BREVE
+ą     &#261;     \U0105     LATIN SMALL LETTER A WITH OGONEK
+ć     &#263;     \U0107     LATIN SMALL LETTER C WITH ACUTE
+Č     &#268;     \U010C     LATIN CAPITAL LETTER C WITH CARON
+č     &#269;     \U010D     LATIN SMALL LETTER C WITH CARON
+Ē     &#274;     \U0112     LATIN CAPITAL LETTER E WITH MACRON
+ē     &#275;     \U0113     LATIN SMALL LETTER E WITH MACRON
+ĕ     &#277;     \U0115     LATIN SMALL LETTER E WITH BREVE
+ė     &#279;     \U0117     LATIN SMALL LETTER E WITH DOT ABOVE
+ę     &#281;     \U0119     LATIN SMALL LETTER E WITH OGONEK
+ě     &#283;     \U011B     LATIN SMALL LETTER E WITH CARON
+ĝ     &#285;     \U011D     LATIN SMALL LETTER G WITH CIRCUMFLEX
+ğ     &#287;     \U011F     LATIN SMALL LETTER G WITH BREVE
+Ī     &#298;     \U012A     LATIN CAPITAL LETTER I WITH MACRON
+ī     &#299;     \U012B     LATIN SMALL LETTER I WITH MACRON
+ĭ     &#301;     \U012D     LATIN SMALL LETTER I WITH BREVE
+į     &#303;     \U012F     LATIN SMALL LETTER I WITH OGONEK
+ı     &#305;     \U0131     LATIN SMALL LETTER DOTLESS I
+Ł     &#321;     \U0141     LATIN CAPITAL LETTER L WITH STROKE
+ł     &#322;     \U0142     LATIN SMALL LETTER L WITH STROKE
+ń     &#324;     \U0144     LATIN SMALL LETTER N WITH ACUTE
+ň     &#328;     \U0148     LATIN SMALL LETTER N WITH CARON
+ŋ     &#331;     \U014B     LATIN SMALL LETTER ENG
+Ō     &#332;     \U014C     LATIN CAPITAL LETTER O WITH MACRON
+ō     &#333;     \U014D     LATIN SMALL LETTER O WITH MACRON
+ŏ     &#335;     \U014F     LATIN SMALL LETTER O WITH BREVE
+ő     &#337;     \U0151     LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ŕ     &#341;     \U0155     LATIN SMALL LETTER R WITH ACUTE
+ř     &#345;     \U0159     LATIN SMALL LETTER R WITH CARON
+Ś     &#346;     \U015A     LATIN CAPITAL LETTER S WITH ACUTE
+ś     &#347;     \U015B     LATIN SMALL LETTER S WITH ACUTE
+ş     &#351;     \U015F     LATIN SMALL LETTER S WITH CEDILLA
+ţ     &#355;     \U0163     LATIN SMALL LETTER T WITH CEDILLA
+ũ     &#361;     \U0169     LATIN SMALL LETTER U WITH TILDE
+ū     &#363;     \U016B     LATIN SMALL LETTER U WITH MACRON
+ŭ     &#365;     \U016D     LATIN SMALL LETTER U WITH BREVE
+ŷ     &#375;     \U0177     LATIN SMALL LETTER Y WITH CIRCUMFLEX
+ź     &#378;     \U017A     LATIN SMALL LETTER Z WITH ACUTE
+Ž     &#381;     \U017D     LATIN CAPITAL LETTER Z WITH CARON
+ž     &#382;     \U017E     LATIN SMALL LETTER Z WITH CARON
+Latin Extended-B
+    &#447;     \U01BF     LATIN LETTER WYNN
+    &#462;     \U01CE     LATIN SMALL LETTER A WITH CARON
+    &#464;     \U01D0     LATIN SMALL LETTER I WITH CARON
+    &#466;     \U01D2     LATIN SMALL LETTER O WITH CARON
+    &#468;     \U01D4     LATIN SMALL LETTER U WITH CARON
+    &#481;     \U01E1     LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
+    &#483;     \U01E3     LATIN SMALL LETTER AE WITH MACRON
+    &#487;     \U01E7     LATIN SMALL LETTER G WITH CARON
+    &#491;     \U01EB     LATIN SMALL LETTER O WITH OGONEK
+    &#496;     \U01F0     LATIN SMALL LETTER J WITH CARON
+    &#519;     \U0207     LATIN SMALL LETTER E WITH INVERTED BREVE
+    &#541;     \U021D     LATIN SMALL LETTER YOGH
+    &#551;     \U0227     LATIN SMALL LETTER A WITH DOT ABOVE
+    &#559;     \U022F     LATIN SMALL LETTER O WITH DOT ABOVE
+    &#563;     \U0233     LATIN SMALL LETTER Y WITH MACRON
+IPA Extensions
+    &#593;     \U0251     LATIN SMALL LETTER SCRIPT A
+    &#594;     \U0252     LATIN SMALL LETTER TURNED SCRIPT A
+    &#596;     \U0254     LATIN SMALL LETTER OPEN O
+    &#601;     \U0259     LATIN SMALL LETTER SCHWA
+    &#604;     \U025C     LATIN SMALL LETTER REVERSED OPEN E
+    &#613;     \U0265     LATIN LETTER SMALL LETTER TURNED H
+    &#618;     \U026A     LATIN LETTER SMALL CAPITAL I
+    &#626;     \U0272     LATIN SMALL LETTER N WITH LEFT HOOK
+    &#643;     \U0283     LATIN SMALL LETTER ESH
+    &#649;     \U0289     LATIN SMALL LETTER U BAR
+    &#650;     \U028A     LATIN SMALL LETTER UPSILON
+    &#652;     \U028C     LATIN SMALL LETTER TURNED V
+    &#655;     \U028F     LATIN LETTER SMALL CAPITAL Y
+    &#658;     \U0292     LATIN SMALL LETTER EZH
+    &#660;     \U0294     LATIN LETTER GLOTTAL STOP
+    &#668;     \U029C     LATIN LETTER SMALL CAPITAL H
+Spacing Modifier Letters
+    &#702;     \U02BE     MODIFIER LETTER RIGHT HALF RING
+    &#703;     \U02BF     MODIFIER LETTER LEFT HALF RING
+ˇ   &#711;     \U02C7     CARON
+    &#712;     \U02C8     MODIFIER LETTER VERTICAL LINE
+    &#716;     \U02CC     MODIFIER LETTER LOW VERTICAL LINE
+    &#720;     \U02D0     MODIFIER LETTER TRIANGULAR COLON
+˘   &#728;     \U02D8     BREVE
+˙   &#729;     \U02D9     DOT ABOVE
+Greek and Coptic
+Α     &#913;     \U0391     GREEK CAPTIAL LETTER ALPHA
+Β     &#914;     \U0392     GREEK CAPTIAL LETTER BETA
+Γ     &#915;     \U0393     GREEK CAPTIAL LETTER GAMMA
+Δ     &#917;     \U0394     GREEK CAPTIAL LETTER DELTA
+Ε     &#917;     \U0395     GREEK CAPTIAL LETTER EPSILON
+Ζ     &#918;     \U0396     GREEK CAPTIAL LETTER ZETA
+Η     &#919;     \U0397     GREEK CAPTIAL LETTER ETA
+Θ     &#920;     \U0398     GREEK CAPTIAL LETTER THETA
+Ι     &#921;     \U0399     GREEK CAPTIAL LETTER IOTA
+Κ     &#922;     \U039A     GREEK CAPTIAL LETTER KAPPA
+Λ     &#923;     \U039B     GREEK CAPTIAL LETTER LAMBDA
+Μ     &#924;     \U039C     GREEK CAPTIAL LETTER MU
+Ν     &#925;     \U039D     GREEK CAPTIAL LETTER NU
+Ξ     &#926;     \U039E     GREEK CAPTIAL LETTER XI
+Ο     &#927;     \U039F     GREEK CAPTIAL LETTER OMICRON
+Π     &#928;     \U03A0     GREEK CAPTIAL LETTER PI
+Ρ     &#929;     \U03A1     GREEK CAPTIAL LETTER RHO
+Σ     &#931;     \U03A3     GREEK CAPTIAL LETTER SIGMA
+Τ     &#932;     \U03A4     GREEK CAPTIAL LETTER TAU
+Υ     &#933;     \U03A5     GREEK CAPTIAL LETTER UPSILON
+Φ     &#934;     \U03A6     GREEK CAPTIAL LETTER PHI
+Χ     &#935;     \U03A7     GREEK CAPTIAL LETTER CHI
+Ψ     &#936;     \U03A8     GREEK CAPTIAL LETTER PSI
+Ω     &#937;     \U03A9     GREEK CAPTIAL LETTER OMEGA
+α     &#945;     \U03B1     GREEK SMALL LETTER ALPHA
+β     &#946;     \U03B2     GREEK SMALL LETTER BETA
+γ     &#947;     \U03B3     GREEK SMALL LETTER GAMMA
+δ     &#948;     \U03B4     GREEK SMALL LETTER DELTA
+ε     &#949;     \U03B5     GREEK SMALL LETTER EPSILON
+ζ     &#950;     \U03B6     GREEK SMALL LETTER ZETA
+η     &#951;     \U03B7     GREEK SMALL LETTER ETA
+θ     &#952;     \U03B8     GREEK SMALL LETTER THETA
+ι     &#953;     \U03B9     GREEK SMALL LETTER IOTA
+κ     &#954;     \U03BA     GREEK SMALL LETTER KAPPA
+λ     &#955;     \U03BB     GREEK SMALL LETTER LAMBDA
+μ     &#956;     \U03BC     GREEK SMALL LETTER MU
+ν     &#957;     \U03BD     GREEK SMALL LETTER NU
+ξ     &#958;     \U03BE     GREEK SMALL LETTER XI
+ο     &#959;     \U03BF     GREEK SMALL LETTER OMICRON
+π     &#960;     \U03C0     GREEK SMALL LETTER PI
+ρ     &#961;     \U03C1     GREEK SMALL LETTER RHO
+ς     &#962;     \U03C2     GREEK SMALL LETTER FINAL SIGMA
+σ     &#963;     \U03C3     GREEK SMALL LETTER SIGMA
+τ     &#964;     \U03C4     GREEK SMALL LETTER TAU
+υ     &#965;     \U03C5     GREEK SMALL LETTER UPSILON
+φ     &#966;     \U03C6     GREEK SMALL LETTER PHI
+χ     &#967;     \U03C7     GREEK SMALL LETTER CHI
+ψ     &#968;     \U03C8     GREEK SMALL LETTER PSI
+ω     &#969;     \U03C9     GREEK SMALL LETTER OMEGA
+      &#977;     \U03D1     GREEK THETA SYMBOL
+      &#989;     \U03DD     GREEK SMALL LETTER DIGAMMA
+Hebrew
+א     &#1488;     \U05D0     HEBREW LETTER ALEPH
+ב     &#1489;     \U05D1     HEBREW LETTER BET
+ג     &#1490;     \U05D2     HEBREW LETTER GIMEL
+ד     &#1491;     \U05D3     HEBREW LETTER DALET
+ה     &#1492;     \U05D4     HEBREW LETTER HE
+ו     &#1493;     \U05D5     HEBREW LETTER VAV
+ז     &#1494;     \U05D6     HEBREW LETTER ZAYIN
+ח     &#1495;     \U05D7     HEBREW LETTER HET
+ט     &#1496;     \U05D8     HEBREW LETTER TET
+י     &#1497;     \U05D9     HEBREW LETTER YOD
+ך     &#1498;     \U05DA     HEBREW LETTER FINAL KAF
+כ     &#1499;     \U05DB     HEBREW LETTER KAF
+ל     &#1500;     \U05DC     HEBREW LETTER LAMED
+ם     &#1501;     \U05DD     HEBREW LETTER FINAL MEM
+מ     &#1502;     \U05DE     HEBREW LETTER MEM
+ן     &#1503;     \U05DF     HEBREW LETTER FINAL NUN
+נ     &#1504;     \U05E0     HEBREW LETTER NUN
+ס     &#1505;     \U05E1     HEBREW LETTER SAMEKH
+ע     &#1506;     \U05E2     HEBREW LETTER AYIN
+ף     &#1507;     \U05E3     HEBREW LETTER FINAL PE
+פ     &#1508;     \U05E4     HEBREW LETTER PE
+ץ     &#1509;     \U05E5     HEBREW LETTER FINAL TSADI
+צ     &#1510;     \U05E6     HEBREW LETTER TSADI
+ק     &#1511;     \U05E7     HEBREW LETTER QOF
+ר     &#1512;     \U05E8     HEBREW LETTER RESH
+ת     &#1514;     \U05EA     HEBREW LETTER TAV
+Latin Extended Additional
+    &#7691;     \U1E0B     LATIN SMALL LETTER D WITH DOT ABOVE
+    &#7693;     \U1E0D     LATIN SMALL LETTER D WITH DOT BELOW
+    &#7703;     \U1E17     LATIN SMALL LETTER E WITH MACRON AND ACUTE
+    &#7714;     \U1E22     LATIN CAPITAL LETTER H WITH DOT ABOVE
+    &#7716;     \U1E24     LATIN CAPITAL LETTER H WITH DOT BELOW
+    &#7717;     \U1E25     LATIN SMALL LETTER H WITH DOT BELOW
+    &#7723;     \U1E2B     LATIN SMALL LETTER H WITH BREVE BELOW
+    &#7731;     \U1E33     LATIN SMALL LETTER K WITH DOT BELOW
+    &#7735;     \U1E37     LATIN SMALL LETTER L WITH DOT BELOW
+    &#7745;     \U1E41     LATIN SMALL LETTER M WITH DOT ABOVE
+    &#7747;     \U1E43     LATIN SMALL LETTER M WITH DOT BELOW
+    &#7749;     \U1E45     LATIN SMALL LETTER N WITH DOT ABOVE
+    &#7751;     \U1E47     LATIN SMALL LETTER N WITH DOT BELOW
+    &#7763;     \U1E53     LATIN SMALL LETTER O WITH MACRON AND ACUTE
+    &#7769;     \U1E59     LATIN SMALL LETTER R WITH DOT ABOVE
+    &#7770;     \U1E5A     LATIN CAPITAL LETTER R WITH DOT BELOW
+    &#7771;     \U1E5B     LATIN SMALL LETTER R WITH DOT BELOW
+    &#7777;     \U1E61     LATIN SMALL LETTER S WITH DOT ABOVE
+    &#7779;     \U1E63     LATIN SMALL LETTER S WITH DOT BELOW
+    &#7787;     \U1E6B     LATIN SMALL LETTER T WITH DOT ABOVE
+    &#7789;     \U1E6D     LATIN SMALL LETTER T WITH DOT BELOW
+    &#7791;     \U1E6F     LATIN SMALL LETTER T WITH LINE BELOW
+    &#7825;     \U1E91     LATIN SMALL LETTER Z WITH CIRCUMFLEX
+    &#7827;     \U1E93     LATIN SMALL LETTER Z WITH DOT BELOW
+    &#7830;     \U1E96     LATIN SMALL LETTER H WITH LINE BELOW
+    &#7841;     \U1EA1     LATIN SMALL LETTER A WITH DOT BELOW
+    &#7885;     \U1ECD     LATIN SMALL LETTER O WITH DOT BELOW
+    &#7929;     \U1EF9     LATIN SMALL LETTER Y WITH TILDE
+General Punctuation
+-   &#8209;     \U2011     NON-BREAKING HYPHEN
+    &#8248;     \U2038     CARET
+    &#8253;     \U203D     INTERROBANG
+    &#8258;     \U2042     ASTERISM
+Arrows
+←   &#8592;     \U2190     LEFTWARDS ARROW
+→   &#8594;     \U2192     RIGHTWARDS ARROW
+Mathematical Operators
+∂   &#8706;     \U2202     PARTIAL DIFFERENTIAL
+√   &#8730;     \U221A     SQUARE ROOT
+∞   &#8734;     \U221E     INFINITY
+∥   &#8741;     \U2225     PARALLEL TO
+∫   &#8747;     \U222B     INTEGRAL
+≠   &#8800;     \U2260     NOT EQUAL TO
+    &#8852;     \U2294     SQUARE CUP
+    &#8853;     \U2295     CIRCLED PLUS
+    &#8942;     \U22EE     VERTICAL ELLIPSIS
+Enclosed Alphanumerics
+    &#9418;     \U24CA     CIRCLED LATIN CAPITAL LETTER U
+Miscellaneous Symbols
+☜   &#9756;     \U261C     WHITE LEFT POINTING INDEX
+☞   &#9758;     \U261E     WHITE RIGHT POINTING INDEX
+    &#9791;     \U263F     MERCURY
+    &#9792;     \U2640     FEMALE SIGN
+    &#9794;     \U2642     MALE SIGN
+    &#9795;     \U2643     JUPITER
+    &#9796;     \U2644     SATURN
+    &#9797;     \U2645     URANUS
+    &#9798;     \U2646     NEPTUNE
+    &#9799;     \U2647     PLUTO
+    &#9824;     \U2660     BLACK SPADE SUIT
+    &#9825;     \U2661     WHITE HEART SUIT
+    &#9826;     \U2662     WHITE DIAMOND SUIT
+    &#9827;     \U2663     BLACK CLUB SUIT
+    &#9837;     \U266D     MUSIC FLAT SIGN
+    &#9838;     \U266E     MUSIC NATURAL SIGN
+    &#9839;     \U266F     MUSIC SHARP SIGN
+Dingbats
+    &#10003;     \U2713     CHECK MARK
+    &#10016;     \U2720     MALTESE CROSS
+Private Use Area
+    -     \UE000     LATIN SMALL LETTER A WITH MACRON AND ACUTE
+    -     \UE001     LATIN SMALL LETTER A WITH MACRON AND TILDE
+    -     \UE002     LATIN SMALL LETTER A WITH VERTICAL LINE ABOVE
+    -     \UE003     LATIN CAPITAL LETTER C WITH MACRON
+    -     \UE004     LATIN SMALL LETTER C WITH MACRON
+    -     \UE005     LATIN SMALL LETTER C WITH BREVE
+    -     \UE006     LATIN SMALL LETTER C WITH DOT BELOW
+    -     \UE007     LATIN SMALL LIGATURE CH
+    -     \UE008     LATIN CAPITAL LETTER D WITH MACRON
+    -     \UE009     LATIN SMALL LETTER E WITH BAR BELOW
+    -     \UE00A     LATIN SMALL LETTER E WITH TILDE
+    -     \UE00B     LATIN SMALL LETTER E WITH MACRON AND BREVE
+    -     \UE00C     LATIN SMALL LETTER E WITH TILDE AND DOT ABOVE
+    -     \UE00D     LATIN SMALL LETTER E WITH HOOK RIGHT BELOW
+    -     \UE00E     LATIN SMALL LETTER G WITH INVERTED BREVE
+    -     \UE00F     LATIN SMALL LETTER I WITH INVERTED BREVE BELOW
+    -     \UE010     LATIN SMALL LETTER I WITH MACRON AND ACUTE
+    -     \UE011     LATIN SMALL LETTER K WITH CIRCUMFLEX
+    -     \UE012     LATIN SMALL LETTER K WITH BREVE
+    -     \UE013     LATIN SMALL LETTER K WITH INVERTED BREVE
+    -     \UE014     LATIN SMALL LIGATURE KH
+    -     \UE015     LATIN CAPITAL LETTER L WITH MACRON
+    -     \UE016     LATIN SMALL LETTER L WITH TILDE
+    -     \UE017     LATIN SMALL LETTER L WITH INVERTED BREVE
+    -     \UE018     LATIN CAPITAL LETTER M WITH MACRON
+    -     \UE019     LATIN SMALL LETTER M WITH MACRON
+    -     \UE01A     LATIN SMALL LETTER M WITH TILDE
+    -     \UE01B     LATIN SMALL LETTER O WITH CEDILLA
+    -     \UE01C     LATIN SMALL LETTER O WITH MACRON AND CIRUMFLEX
+    -     \UE01E     LATIN SMALL LIGATURE OI
+    -     \UE01F     LATIN SMALL LIGATURE OO
+    -     \UE020     LATIN SMALL LIGATURE OO WITH MACRON
+    -     \UE021     LATIN SMALL LIGATURE OU
+    -     \UE022     LATIN SMALL LETTER OPEN O WITH ACUTE
+    -     \UE023     LATIN SMALL LETTER R WITH DIARESIS
+    -     \UE024     LATIN SMALL LETTER R WITH CIRCUMFLEX
+    -     \UE025     LATIN SMALL LETTER R WITH RING BELOW
+    -     \UE026     LATIN SMALL LETTER S WITH VERTICAL LINE ABOVE
+    -     \UE027     LATIN SMALL LETTER S WITH OGONEK
+    -     \UE028     LATIN SMALL LETTER S WITH COMMA
+    -     \UE02A     LATIN SMALL LETTER S WITH BREVE
+    -     \UE02B     LATIN SMALL LIGATURE SH
+    -     \UE02C     LATIN SMALL LIGATURE TH
+    -     \UE02D     LATIN SMALL LETTER U WITH MACRON AND ACUTE
+    -     \UE02E     LATIN CAPITAL LETTER V WITH MACRON
+    -     \UE02F     LATIN CAPITAL LETTER X WITH MACRON
+    -     \UE030     LATIN SMALL LETTER X WITH CIRCUMFLEX
+    -     \UE031     LATIN SMALL LETTER Y WITH BREVE
+    -     \UE032     LATIN SMALL LIGATURE ZH
+    -     \UE033     LATIN SMALL LETTER TURNED E WITH ACUTE
+    -     \UE034     LATIN SMALL LETTER TURNED E WITH CIRCUMFLEX
+    -     \UE035     GREEK SMALL LETTER ALPHA WITH GRAVE
+    -     \UE036     MUSICAL SYMBOL SEGNO
+    -     \UE037     MUSICAL SYMBOL FERMATA
+    -     \UE038     MUSICAL SYMBOL CRESCENDO
+    -     \UE039     MUSICAL SYMBOL DECRESCENDO
+    -     \UE03A     MUSICAL SYMBOL DOUBLE SHARP
+    -     \UE03B     MUSICAL SYMBOL BREVE
+    -     \UE03C     MUSICAL SYMBOL DOWN BOW
+    -     \UE03D     MUSICAL SYMBOL UP BOW
+    -     \UE03E     MUSICAL SYMBOL BREVE ALTERNATE
+    -     \UE03F     PRINTING SYMBOL DELE
+    -     \UE040     PRINTING SYMBOL FRACTIONAL EM
+    -     \UE041     INVERTED ASTERISM
+    -     \UE042     LATIN SMALL LETTER SCHWA SUPERSCRIPT
+    -     \UE043     LATIN SMALL LETTER TURNED Y
+    -     \UE044     LATIN SMALL LIGATURE OE WITH MACRON
+    -     \UE045     SQUARE ROOT WITH BAR
+    -     \UE046     LATIN SMALL LETTER U WITH DOT ABOVE
+    -     \UE047     LATIN SMALL LIGATURE UE
+    -     \UE048     LATIN SMALL LIGATURE UE WITH MACRON
+    -     \UE049     LATIN SMALL LETTER OPEN O WITH TILDE
+    -     \UE04A     LATIN SMALL LETTER T WITH CARON BELOW
+    -     \UE04B     LATIN SMALL LETTER SCRIPT A WITH TILDE
+    -     \UE04C     GREEK SMALL LETTER EPSILON WITH TILDE
+    -     \UE04D     LATIN SMALL LIGATURE OE WITH TILDE
+    -     \UE04E     MODIFIER LETTER DOUBLE VERTICAL LINE
+    -     \UE04F     DOUBLE HYPHEN
+    -     \UE050     LATIN SMALL LETTER SCHWA WITH DOT ABOVE
+    -     \UE051     LATIN SMALL LETTER SCHWA WITH MACRON
+Alphabetic Presentation Forms
+ﬂ     &#64258;     \UFB02     LATIN SMALL LIGATURE FL
+שׁ     &#64298;     \UFB2A     HEBREW LETTER SINH WITH SHIN DOT
+שׂ     &#64299;     \UFB2B     HEBREW LETTER SINH WITH SIN DOT
+
--- a/format_docs/pdb/ztxt.txt
+++ b/format_docs/pdb/ztxt.txt
@ -0,0 +1,226 @@
+The zTXT Format
+---------------
+
+The zTXT format is relatively straightforward. The simplest zTXT contains a
+Palm database header, followed by zTXT record #0, followed by the compressed
+data. The compressed data can be in one of two formats: one long data stream,
+or split into chunks for random access. If there are any bookmarks, they occupy
+the record immediately after the compressed data. If there are any annotations,
+the annotation index occupies the record immediately after the bookmarks with
+each annotation in the index having a record immediately after the annotation
+index. Here are diagrams of a simple zTXT and a full featured zTXT:
+
+    DB Header
+0   Record 0
+1
+2
+3
+... Compressed Data
+36
+37
+38
+
+    DB Header
+0   Record 0
+1
+2
+3
+... Compressed Data
+36
+37
+38
+39  Bookmarks
+40  Annotation Index
+41  Annotation 1
+42  Annotation 2
+43  Annotation 3
+
+
+Compression Modes
+-----------------
+
+zTXT version 1.40 and later supports two modes of compression. Mode 1 is a
+random access mode, and mode 2 consists of one long data stream. Both modes
+work on 8K (the default record size) blocks of text.
+
+Please note, however, that as of Weasel Reader version 1.60 the old style
+(mode 2) zTXT format is no longer supported. makeztxt and libztxt still support
+creating these documents for backwards compatibility, but you should not use
+mode 2 if possible.
+
+
+Mode 1
+------
+
+In mode one, 8K blocks of text are compressed into an equal number of blocks of
+compressed data. Using the Z_FULL_FLUSH flush mode with zLib allows for random
+access among the blocks of data. In order for this to function, the first block
+must be decompressed first, and after that any block in the file may be
+decompressed in any order. In mode 1, the blocks of compressed data will likely
+not all have the same size.
+
+
+Mode 2
+------
+
+In zTXT versions before 1.40, this was the only method of compression. This
+mode involves compressing the entire input buffer into a single output buffer
+and then splitting the resulting buffer into 8K segments. This mode requires
+that all of the compressed data be decompressed in one pass. Since there are no
+real 'blocks' of data, the resulting output can be of any blocksize, though
+typically the default of 8K should be fine. The advantage to mode 2 is that it
+will give about 10% - 15% more compression.
+
+
+zTXT Record #0 Definition (version 1.44)
+----------------------------------------
+
+Record 0 provides all of the information about the zTXT contents. Be sure it is
+correct, lest firey death rain down upon your program.
+
+typedef struct zTXT_record0Type {
+  UInt16        version;
+  UInt16        numRecords;
+  UInt32        size;
+  UInt16        recordSize;
+  UInt16        numBookmarks;
+  UInt16        bookmarkRecord;
+  UInt16        numAnnotations;
+  UInt16        annotationRecord;
+  UInt8         flags;
+  UInt8         reserved;
+  UInt32        crc32;
+  UInt8         padding[0x20 - 24];
+} zTXT_record0;
+
+
+Structure Elements
+------------------
+
+UInt16        version;
+
+This is mostly just informational. Your program can figure out what features
+might be available from the version. However, the remaining parts of the
+structure are designed such that their value will be 0 if that particular
+feature is not present, so that is the correct way to test. The version is
+stored as two 8 bit integers. For example, version 1.42 is 0x012A.
+
+UInt16        numRecords;
+
+This is the number of DATA records only and does not include record 0,
+bookmarks, or annotations. With compression mode 1, this is also the number of
+uncompressed text records. With mode 2, you must decompress the file to figure
+out how many text records there will be.
+
+UInt32        size;
+
+The size in bytes of the uncompressed data in the zTXT. Check this value with
+the amount of free storage memory on the Palm to make sure there's enough room
+to decompress the data in full or in part.
+
+UInt16        recordSize;
+
+recordSize is the size in bytes of a text record. This field is important, as
+the size of text and decompression buffers is based on this value. It is used
+by Weasel to navigate though the text so it can map absolute offsets to record
+numberss. 8192 is the default. With compression mode 1, this is the amount of
+data inside each compressed record (except maybe the last one), but the actual
+compressed records will likely have varying sizes. In mode 2, both compressed
+records and the resulting text records are all of this size (except, again, the
+last record).
+
+UInt16        numBookmarks;
+
+The definitive count of how many bookmarks are stored in the bookmark index
+record. See the section on bookmarks below.
+
+UInt16        bookmarkRecord;
+
+If there are any bookmarks, this is set to the record index number that
+contains the bookmark listing, otherwise it is 0.
+
+UInt16        numAnnotations;
+
+Like the bookmark count, this is the definitive count of how many annotations
+are in the annotation index and how many annotation records follow it. See the
+section on annotation below.
+
+UInt16        annotationRecord;
+
+If there are any annotations, this is set to the record index number that
+contains the annotation index, otherwise it is 0.
+
+UInt8         flags;
+
+These flags indicate various features of the zTXT database. flags is a bitmask
+and at present the only two defined bits are:
+
+ZTXT_RANDOMACCESS (0x01)
+    If the zTXT was compressed according to the method in mode 1, then it
+    supports random access and this should be set.
+ZTXT_NONUNIFORM (0x02)
+    Setting this bit indicates that the text records within the zTXT database
+    are not of uniform length. That is, when the blocks of text are
+    decompressed they will not have identical block sizes. If this is not set,
+    the compressed blocks are assumed to all have the same size when
+    decompressed (typically 8K) except for the last block which can be smaller.
+
+UInt32        crc32;
+
+A CRC32 value for checking data integrity. This value is computer over all text
+data record only and does not include record 0 nor any bookmark/annotation
+records. The current implementation in makeztxt/Weasel computes this value
+using the crc32 function in zLib which should be the standard CRC32 definition.
+
+UInt8         padding[0x20 - 24];
+
+zTXT record zero is 32 bytes in length, so the unused portion is padded.
+
+
+zTXT Bookmarks
+--------------
+
+zTXT bookmarks are stored in a simple array in a record at the end of a zTXT.
+The format is as follows:
+
+#define MAX_BMRK_LENGTH         20
+
+typedef struct GPlmMarkType {
+  UInt32        offset;
+  Char          title[MAX_BMRK_LENGTH];
+} GPlmMark;
+
+In the structure, offset is counted as an absolute offset into the text. The
+bookmarks must be sorted in ascending order.
+
+If there are no bookmarks, then the bookmark index does not exist. When the
+user creates the first bookmark, the record containing the index will then be
+created. If there are annotations, when the bookmark record is created it must
+go before the annotation index. This will require incrementing annotationRecord
+in record 0 to point to the new record index.
+
+Similarly, when all bookmarks are deleted the bookmark index record is also
+deleted. If there are annotations, annotationRecord in record 0 must be
+decremented to point to the new index.
+
+
+zTXT Annotations
+----------------
+
+zTXT annotations have a format almost identical to that of the bookmark index:
+
+typedef struct GPlmAnnotationType {
+  UInt32        offset;
+  Char          title[MAX_BMRK_LENGTH];
+} GPlmAnnotation;
+
+Like the bookmarks, offset is an absolute offset into the text. The annotation
+index is organized just as the bookmarks are, as a single array in a record.
+Note that this structure does NOT store the actual annotation text.
+
+The text of each annotation is stored in its own record immediately following
+the index. So, the first annotation in the index will occupy the first record
+following the index, and the second annotation will be in the second record
+following the index, and so on. The text of each annotation is limited to
+4096 bytes.
+
--- a/format_docs/rb.txt
+++ b/format_docs/rb.txt
@ -0,0 +1,303 @@
+Rocket eBook File Format
+------------------------
+
+from http://rbmake.sourceforge.net/rb_format.html
+
+
+Overview
+--------
+
+This document attempts to describe the format of a .rb file -- the book
+format that is downloaded into NuvoMedia's <http://www.nuvomedia.com>
+hand-held wonder, the Rocket eBook
+<http://www.rocket-ebook.com/enter.html>.
+
+*Note:* All multi-byte integers are stored in Vax/Intel order (the
+opposite of network byte order). Most integers are 4 bytes (an int32),
+but there are some minor exceptions (as detailed below).
+
+Also, the following document refers to the .rb file sections as "pages".
+
+
+Details
+-------
+
+The first 4 bytes of the file seem to be a magic number (in hex): B0 0C
+B0 0C. I like to think of this as a hexidecimal pun on the word "book"
+(repeated). [Matt Greenwood has reported seeing a magic number of "B0 0C
+F0 0D" in another type of ReB-related file -- i.e. "book food".]
+
+The next two bytes appear to be a version number, currently "02 00". I
+assume this means major version 2, minor version 0.
+
+The next 4 bytes are the string "NUVO", followed by 4 bytes of 00h. (I
+have also seen an old title that had 0s in place of the "NUVO".)
+
+This brings us up to offset 0Eh, at which point we have a 4-byte
+representation of the date the book was created (Matt Greenwood pointed
+this out to me -- thanks!). The year is encoded as an int16. On older
+version of the RocketLibrary was encoding the year's full value (e.g.
+1999 was "CF 07" and 2000 was "D0 07"), but a more recent version is now
+using the tm_year value verbatim -- i.e. it's storing 100 for the year
+2000 ("64 00"). The year is followed by an int8 for the 1-relative month
+number, and an int8 for the day of the month.
+
+After that is 6 bytes of 00h. These may be reserved for setting the time
+of creation (at a guess).
+
+Then, at offset 18h, we have an int32 that contains the absolute offset
+of the "Table of Contents" (the directory of the pages contained within
+this .rb file). In all of the .rb file's I've seen, this remains
+constant with a value of 128h. However, I have tested an atypical .rb
+file where I placed the ToC at the end of the file (after all the file
+contents), and it worked fine. (I've chosen not to build any books in
+such a non-standard format, however.)
+
+Immediately following this is an int32 with the length of the .rb file
+(so we can check if the file is complete or not).
+
+All the bytes from here (offset 20h) up to offset 128h appear to only be
+used by an encrypted title. In a non-encrypted title, they are always 0.
+
+The table of contents typically comes next (at offset 128h). It starts
+with an int32 count of the number of "page" entries (.rb-file sections)
+in the ToC. Each entry consists of a name (zero-padded to 32 bytes),
+followed by 3 int32s: the length of this entry's data segment, the
+absolute offset of the data in the .rb file, and a flag. The known flag
+values are: 1 (encrypted), 2 (info page), and 8 (deflated). The names
+are tweaked as needed to ensure that they are all unique. The current
+RocketWriter software uses a unique 6-digit number, a dash, up to 8
+characters from the filename, and then the re-mapped suffix for the data
+(.html, .hidx, .png, .info, etc.). My rbmake library simply ensures that
+the names are no longer than 15 characters (not counting the suffix) and
+are all unique.
+
+Often the first item in the ToC is the info page, but it doesn't have to
+be. This page of information contains NAME=VALUE pairs that note the
+author, title, what the root-page's name is, etc. (See appendix A). This
+data is never encrypted nor compressed, so this entry's flag value is
+always "2".
+
+An image page is always stored as a B&W image in PNG format. Since it
+has its own compression, it is stored without any additional attempt at
+deflation. I have also never seen an encrypted image, so its flag value
+is always 0.
+
+An HTML page contains the tags and text that were re-written into a
+consistent syntax (this presumably makes the HTML renderer in the ReB
+itself simpler). HTML pages are typically compressed (See appendix B).
+Every HTML page appears to use the suffix .html no matter what the file
+name was on import (but I have seen older files with .htm used as the
+suffix, so the rocket appears to support both).
+
+For every HTML page there is a corresponding .hidx page that contains a
+summary of the paragraph formatting and the position of the anchor names
+in the associated .html page (See appendix C). This page is sometimes
+compressed, depending on length (See appendix B).
+
+There are also reference titles that have a .hkey page that contains a
+list of words that can be looked up in the associated .html page (See
+appendix D).
+
+Immediately following the ToC is the data for each piece mentioned in
+the ToC, in the same order as it appeared in the ToC.
+
+Finally, the end of the file appears to be padded with 20 bytes of 01h.
+
+
+Appendix A: Info Page Format
+----------------------------
+
+The info page consists of a series of lines that contain "NAME=VALUE"
+strings. Each line is terminated by a single newline. Here are the
+values that the RocketWriter generates:
+
+    COMMENT=Info file for <title>
+    TYPE=2
+    TITLE=<title>
+    AUTHOR=<author>
+    URL=ebook:<long, unique string used for the file's name by the librarian>
+    GENERATOR=<e.g. RocketLibrarian 1.3.216>
+    PARSE=1
+    OUTPUT=1
+    BODY=<name of root HTML page (as it appears in the ToC)>
+    MENUMARK=menumark.html
+    SuggestedRetailPrice=<usually empty>
+
+Encrypted titles have a few more entries (including those listed above):
+
+    ISBN=<ISBN number, including dashes>
+    REVISION=<digits>
+    TITLE_LANGUAGE=<en-us>
+    PUB_NAME=<Publisher's name>
+    PUBSERVER_ID=<digits>
+    GENERATOR=<e.g. RocketPress 1.3.121>
+    VERSION=<digits>
+    USERNAME=<rocket-ID>
+    COPY_ID=<digits>
+    COPYRIGHT=<copyright>
+    COPYTITLE=<another copyright?>
+
+A reference title also has an indication that there is a .hkey page
+present, and may also have a GENRE of "Reference":
+
+    HKEY=1
+    GENRE=Reference
+
+
+Appendix B: The format of compressed data
+-----------------------------------------
+
+Compressed pages have a data section in the .rb file with the following
+format:
+
+The first int32 is a count of the number of 4096-byte chunks of data we
+broke the uncompressed page into (the last chunk can be shorter than
+4096 bytes, of course).
+
+This is immediately followed by an int32 with the length of the entire
+uncompressed data.
+
+After this there are <count> int32s that indicate the size of each
+chunk's compressed data.
+
+Following these length int32s is the output from a deflation (the
+algorithm used in gzip) for each 4096-byte chunk of the original data.
+It appears that you must use a window-bit size of 13 and a compression
+level of "best" to be compatible with the Rocket eBook's system software.
+
+
+Appendix C: HTML-index Page Format
+----------------------------------
+
+The .hidx page's purpose is to allow the renderer to quickly look up the
+format of each paragraph (useful for random access to the data), and the
+position of the anchor names.
+
+The first section lists the various paragraph-producing tags. It is
+headed by a line of "[tags <count>]", where <count> is the number of
+tags that follow this header. The tags are listed one per line, and have
+an implied enumeration from 0 to N-1 (which the other tags and the
+upcoming paragraph sections reference).
+
+The first tag is typically (always?) "<HTML> -1". The number trailing
+the tag indicates what other tag (or sequence of tags, one per line) in
+which we are nested. So, if we have a <BR> nested inside a <P
+ALIGN="center">, it would be listed separately from a <BR> that was
+nested inside a normal paragraph, and each one would have a different
+trailing index number.
+
+Following the tag section is the paragraph section. The heading is
+"[paragraphs <count>]", and is followed by a line for each paragraph.
+These lines consist of a character offset into the .html page for the
+start of the paragraph followed by a 0-relative offset into the tag
+section (indicating what kind of formatting to use for the indicated
+paragraph).
+
+The paragraph-section character offsets point to the first bit of text
+after the associated tag.
+
+The last section details the anchor names. The heading is
+"[names <count>]", and each item that follows is a quoted string of the
+anchor name, followed by a character offset into the .html page where
+we'll find that name. If there are no names in the associated HTML
+section, the heading is included with a 0 count (i.e. "[names 0]").
+
+The name-section character offsets point to the start of the anchor tag
+(not after the tag, like the offsets in the "paragraphs" section).
+
+The lines are terminated by newlines (in standard unix fashion).
+
+For example:
+
+    [tags 10]
+    <HTML> -1
+    <BODY> 0
+    <P ALIGN="right"> 1
+    <P ALIGN="left"> 1
+    <P> 1
+    <H3 ALIGN="center"> 1
+    <P ALIGN="center"> 1
+    <BR> 6
+    <H2 ALIGN="center"> 1
+    <BR> 1
+
+    [paragraphs 42]
+    160 9
+    164 9
+    184 8
+    220 8
+    261 6
+    316 5
+    359 1
+    379 6
+    410 6
+    460 7
+    511 7
+    564 7
+    616 7
+    668 7
+    720 7
+    773 7
+    827 7
+    880 7
+    933 7
+    988 7
+    1043 7
+    1100 7
+    1157 7
+    1214 7
+    1270 7
+    1328 7
+    1385 7
+    1442 7
+    1497 7
+    1556 7
+    1561 7
+    1635 1
+    1656 5
+    1690 6
+    1737 7
+    1773 5
+    1798 4
+    1826 3
+    2663 1
+    2668 4
+    2689 2
+    2730 8
+
+    [names 1]
+    "ch1" 2689
+
+
+Appendix D: HTML-key Page Format
+--------------------------------
+
+The .hkey page contains a list of words, one per line, sorted in a
+strict ASCII sequence, each one followed by a tab and the offset in the
+.html page of the word's data. I presume that the .hkey page must share
+the same name prefix as its related .html page.
+
+If the names contain high-bit characters, they are translated into
+regular ASCII in the .hkey file, since this allows the user to search
+for the words using unaccented characters.
+
+The lines are terminated with a newline (in standard unix fashion).
+
+An example:
+
+    a	5
+    apple	38
+    b	84
+    book	104
+
+Each of these offsets points to a paragraph tag in the associated .html
+page. I have only seen this sequence of tags used so far:
+
+    <P><BIG><B>word</B></BIG> other stuff</P>
+
+I have seen multiple <B>...</B> tags in the middle of the single set of
+<BIG>...</BIG> tags, but this is the basic tag format.
+
+The offset in the .hkey page points to the start of the <P> tag.
+
--- a/format_docs/tcr.txt
+++ b/format_docs/tcr.txt
@ -0,0 +1,56 @@
+About
+-----
+
+Text compression format that can be decompressed starting at any point.
+Little-endian byte ordering is used.
+
+
+Header
+------
+
+TCR files always start with:
+
+!!8-Bit!!
+
+
+Layout
+------
+
+Header
+256 key dictionary
+compressed text
+
+
+Dictionary
+----------
+
+A dictionary of key and replacement string. There are a total of 256 keys,
+0 - 255. Each string is preceded with one byte that represents the length of
+the string.
+
+
+Compressed text
+---------------
+
+The compressed text is a series of values 0-255 which correspond to a key and
+thus a string. Reassembling is replacing each key in the compressed text with
+its corresponding string.
+
+
+Compressor
+-----------------
+
+From Andrew Giddings TCR.c (http://www.cix.co.uk/~gidds/Software/TCR.html):
+
+The TCR compression format is easy to describe: after the fixed header is a
+dictionary of 256 strings, each preceded by a length byte.  The rest of the
+file is a list of codes from this dictionary.
+
+The compressor works by starting with each code defined as itself.  While
+there's an unused code, it finds the most common two-code combination, and
+creates a new code for it, replacing all occurrences in the text with the
+new code.
+
+It also searches for codes that are always followed by another, which it can
+merge, possibly freeing up some.
+
--- a/resources/calibre-portable.bat
+++ b/resources/calibre-portable.bat
@ -1,6 +1,4 @@
@echo OFF
-REM			CalibreRun.bat
-REM			~~~~~~~~~~~~~~
 REM Batch File to start a Calibre configuration on Windows
 REM giving explicit control of the location of:
 REM  - Calibe Program Files
@ -8,23 +6,38 @@ REM  - Calibre Library Files
 REM  - Calibre Config Files
 REM  - Calibre Metadata database
 REM  - Calibre Source files
+REM  - Calibre Temp Files
 REM By setting the paths correctly it can be used to run:
 REM  - A "portable calibre" off a USB stick.
 REM  - A network installation with local metadata database
 REM    (for performance) and books stored on a network share 
+REM  - A local installation using customised settings
 REM
-REM If trying to run off a USB stick then the following 
-REM folder structure is recommended:
+REM If trying to run off a USB stick then the folder structure
+REM shown below is recommended (relative to the location of 
+REM this batch file).  This can structure can also be used
+REM when running of a local hard disk if you want to get the
+REM level of control this batch file provides.
 REM  - Calibre2			Location of program files
 REM  - CalibreConfig		Location of Configuration files
 REM  - CalibreLibrary		Location of Books and metadata
+REM  - CalibreSource 		Location of Calibre Source files (Optional)
+REM
+REM This batch file is designed so that if you create the recommended
+REM folder structure then it can be used 'as is' without modification.


 REM -------------------------------------
 REM Set up Calibre Config folder
+REM
+REM This is where user specific settings
+REM are stored.
 REM -------------------------------------

-If EXIST CalibreConfig SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
+IF EXIST CalibreConfig (
+	SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
+	ECHO CONFIG FILES:    %cd%\CalibreConfig
+)


 REM --------------------------------------------------------------
@ -34,28 +47,45 @@ REM Location where Book files are located
 REM Either set explicit path, or if running from a USB stick
 REM a relative path can be used to avoid need to know the
 REM drive letter of the USB stick.
-
+REM
 REM Comment out any of the following that are not to be used
+REM (although leaving them in does not really matter)
 REM --------------------------------------------------------------

-SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
-IF EXIST CalibreLibrary SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
-IF EXIST CalibreBooks SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks
+IF EXIST U:\eBooks\CalibreLibrary (
+	SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
+	ECHO LIBRARY FILES:   U:\eBOOKS\CalibreLibrary
+)
+IF EXIST CalibreLibrary (
+	SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
+	ECHO LIBRARY FILES:   %cd%\CalibreLibrary
+)


 REM --------------------------------------------------------------
-REM Specify Location of metadata database  (optional)
+REM Specify Location of metadata database (optional)
 REM
 REM Location where the metadata.db file is located.  If not set
 REM the same location as Books files will be assumed.  This.
 REM options is used to get better performance when the Library is
 REM on a (slow) network drive.  Putting the metadata.db file 
-REM locally gives a big performance improvement.
+REM locally then makes gives a big performance improvement.
+REM
+REM NOTE.  If you use this option, then the ability to switch
+REM        libraries within Calibre will be disabled.  Therefore
+REM        you do not want to set it if the metadata.db file
+REM        is at the same location as the book files.
 REM --------------------------------------------------------------

-IF EXIST CalibreBooks SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db
-IF EXIST CalibreMetadata SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
-
+IF EXIST %cd%\CalibreMetadata\metadata.db (
+	IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" (
+		SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
+		ECHO DATABASE:        %cd%\CalibreMetadata\metadata.db
+		ECHO '
+		ECHO ***CAUTION*** Library Switching will be disabled 
+		ECHO '
+	)
+)

 REM --------------------------------------------------------------
 REM Specify Location of source (optional)
@ -63,39 +93,63 @@ REM
 REM It is easy to run Calibre from source
 REM Just set the environment variable to where the source is located
 REM When running from source the GUI will have a '*' after the version.
+REM number that is displayed at the bottom of the Calibre main screen.
 REM --------------------------------------------------------------

-IF EXIST Calibre\src SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src
+IF EXIST CalibreSource\src (
+	SET CALIBRE_DEVELOP_FROM=%cd%\CalibreSource\src
+	ECHO SOURCE FILES:    %cd%\CalibreSource\src
+)


 REM --------------------------------------------------------------
-REM Specify Location of calibre binaries (optinal)
+REM Specify Location of calibre binaries (optional)
 REM
 REM To avoid needing Calibre to be set in the search path, ensure
 REM that Calibre Program Files is current directory when starting.
 REM The following test falls back to using search path .
-REM This folder can be populated by cpying the Calibre2 folder from
-REM an existing isntallation or by isntalling direct to here.
+REM This folder can be populated by copying the Calibre2 folder from
+REM an existing installation or by installing direct to here.
 REM --------------------------------------------------------------

-IF EXIST Calibre2 CD Calibre2
+IF EXIST %cd%\Calibre2 (
+	CD %cd%\Calibre2
+	ECHO PROGRAM FILES:   %cd%
+)


-REM --------------------------------------------
-REM Display settings that will be used
-REM --------------------------------------------
+REM --------------------------------------------------------------
+REM Location of Calibre Temporary files  (optional)
+REM
+REM Calibre creates a lot of temproary files while running
+REM In theory these are removed when Calibre finishes, but
+REM in practise files can be left behind (particularily if
+REM any errors occur.  Using this option allows some
+REM explicit clean-up of these files.
+REM If not set Calibre uses the normal system TEMP location
+REM --------------------------------------------------------------

-echo PROGRAMS=%cd%
-echo SOURCE=%CALIBRE_DEVELOP_FROM%
-echo CONFIG=%CALIBRE_CONFIG_DIRECTORY%
-echo LIBRARY=%CALIBRE_LIBRARY_DIRECTORY%
-echo DATABASE=%CALIBRE_OVERRIDE_DATABASE_PATH%
+SET CALIBRE_TEMP_DIR=%TEMP%\CALIBRE_TEMP
+ECHO TEMPORARY FILES: %CALIBRE_TEMP_DIR%

+IF NOT "%CALIBRE_TEMP_DIR%" == "" (
+	IF EXIST "%CALIBRE_TEMP_DIR%" RMDIR /s /q "%CALIBRE_TEMP_DIR%"
+	MKDIR "%CALIBRE_TEMP_DIR%"
+	REM set the following for any components that do
+	REM not obey the CALIBRE_TEMP_DIR setting
+	SET TMP=%CALIBRE_TEMP_DIR%
+	SET TEMP=%CALIBRE_TEMP_DIR%
+)
+
+
+REM ----------------------------------------------------------
 REM  The following gives a chance to check the settings before
 REM  starting Calibre.  It can be commented out if not wanted.
+REM ----------------------------------------------------------

-echo "Press CTRL-C if you do not want to continue"
-pause
+ECHO '
+ECHO "Press CTRL-C if you do not want to continue"
+PAUSE


 REM --------------------------------------------------------
@ -110,5 +164,7 @@ REM If used without /WAIT opotion launches Calibre and contines batch file.
 REM Use with /WAIT to wait until Calibre completes to run a task on exit
 REM --------------------------------------------------------

-echo "Starting up Calibre"
-START /belownormal Calibre --with-library %CALIBRE_LIBRARY_DIRECTORY%
+ECHO "Starting up Calibre"
+ECHO OFF
+ECHO %cd%
+START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%"
--- a/resources/catalog/section_list_templates.py
+++ b/resources/catalog/section_list_templates.py
@ -6,6 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 '''
+    These templates control the content of titles displayed in the various sections
+
    Available fields:
    {title}          Title of the book
    {series}         Series name
@ -14,6 +16,7 @@ __docformat__ = 'restructuredtext en'
    {rating_parens}  Rating, in parentheses
    {pubyear}        Year the book was published
    {pubyear_parens} Year the book was published, in parentheses
+
 '''
 # Books by Author
 by_authors_normal_title_template = '{title} {pubyear_parens}'
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -1,55 +1,19 @@
 body { background-color: white; }

-p.title  {
-	margin-top:0em;
-	margin-bottom:0em;
-	text-align:center;
-	font-style:italic;
-	font-size:xx-large;
-	}
-
-p.series_id {
-	margin-top:0em;
-	margin-bottom:0em;
-	text-align:center;
-	}
-
 a.series_id {
 	font-style:normal;
 	font-size:large;
 	}

-p.author {
-	font-size:large;
-	margin-top:0em;
-	margin-bottom:0em;
-	text-align: center;
-	text-indent: 0em;
-  	}
-
-p.author_index {
-	font-size:large;
-	font-weight:bold;
-	text-align:left;
-	margin-top:0px;
-	margin-bottom:-2px;
-	text-indent: 0em;
-	}
-
-p.genres {
-	font-style:normal;
-	margin-top:0.5em;
-	margin-bottom:0em;
-	text-align: left;
-	text-indent: 0.0in;
-	}
-
-p.formats {
-	font-size:90%;
-	margin-top:0em;
-	margin-bottom:0.5em;
-	text-align: left;
-	text-indent: 0.0in;
+/*
+* 	Minimize widows and orphans by logically grouping chunks
+*   Some reports of problems with Sony (ADE) ereaders
+*	   ADE: page-break-inside:avoid;
+*	iBooks: display:inline-block;
+*		    width:100%;
+*/
+div.author_logical_group {
+	page-break-inside:avoid;
 	}

 div.description > p:first-child {
@ -62,61 +26,8 @@ div.description {
 	text-indent: 1em;
 	}

-p.date_index {
-	font-size:x-large;
-	text-align:center;
-	font-weight:bold;
-	margin-top:1em;
-	margin-bottom:0px;
-	}
-
-p.letter_index {
-	font-size:x-large;
-	text-align:center;
-	font-weight:bold;
-	margin-top:1em;
-	margin-bottom:0px;
-	}
-
-p.series {
-	font-style:italic;
-	margin-top:2px;
-	margin-bottom:0px;
-	margin-left:2em;
-	text-align:left;
-	text-indent:-2em;
-	}
-
-p.read_book {
-	text-align:left;
-	margin-top:0px;
-	margin-bottom:0px;
-	margin-left:2em;
-	text-indent:-2em;
-	}
-
-p.unread_book {
-	text-align:left;
-	margin-top:0px;
-	margin-bottom:0px;
-	margin-left:2em;
-	text-indent:-2em;
-	}
-
-p.wishlist_item {
-	text-align:left;
-	margin-top:0px;
-	margin-bottom:0px;
-	margin-left:2em;
-	text-indent:-2em;
-	}
-
-p.date_read {
-	text-align:left;
-	margin-top:0px;
-	margin-bottom:0px;
-	margin-left:6em;
-	text-indent:-6em;
+div.initial_letter {
+	page-break-before:always;
 	}

 hr.annotations_divider {
@ -152,6 +63,102 @@ hr.merged_comments_divider {
    border-left: solid white 0px;
 	}

+p.date_read {
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:6em;
+	text-indent:-6em;
+	}
+
+p.author {
+	font-size:large;
+	margin-top:0em;
+	margin-bottom:0em;
+	text-align: center;
+	text-indent: 0em;
+  	}
+
+p.author_index {
+	font-size:large;
+	font-weight:bold;
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:-2px;
+	text-indent: 0em;
+	}
+
+p.author_title_letter_index {
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:0px;
+	margin-bottom:0px;
+	}
+
+p.date_index {
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:1em;
+	margin-bottom:0px;
+	}
+
+p.formats {
+	font-size:90%;
+	margin-top:0em;
+	margin-bottom:0.5em;
+	text-align: left;
+	text-indent: 0.0in;
+	}
+
+p.genres {
+	font-style:normal;
+	margin-top:0.5em;
+	margin-bottom:0em;
+	text-align: left;
+	text-indent: 0.0in;
+	}
+
+p.series {
+	font-style:italic;
+	margin-top:0.25em;
+	margin-bottom:0em;
+	margin-left:2em;
+	text-align:left;
+	text-indent:-2em;
+	}
+
+p.series_id {
+	margin-top:0em;
+	margin-bottom:0em;
+	text-align:center;
+	}
+
+p.series_letter_index {
+	font-size:x-large;
+	text-align:center;
+	font-weight:bold;
+	margin-top:1em;
+	margin-bottom:0px;
+	}
+
+p.title  {
+	margin-top:0em;
+	margin-bottom:0em;
+	text-align:center;
+	font-style:italic;
+	font-size:xx-large;
+	}
+
+p.wishlist_item, p.unread_book, p.read_book {
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:2em;
+	text-indent:-2em;
+	}
+
 td.publisher, td.date {
 	font-weight:bold;
 	text-align:center;
--- a/resources/images/heuristics.png
+++ b/resources/images/heuristics.png
--- a/resources/images/news/dailytportal.png
+++ b/resources/images/news/dailytportal.png
--- a/resources/images/news/latimes.png
+++ b/resources/images/news/latimes.png
--- a/resources/mime.types
+++ b/resources/mime.types
@ -585,7 +585,6 @@ application/vnd.osa.netdeploy
 application/vnd.osgi.bundle				
 application/vnd.osgi.dp				dp
 application/vnd.otps.ct-kip+xml				
-application/vnd.palm				oprc pdb pqa
 application/vnd.paos.xml				
 application/vnd.pg.format				str
 application/vnd.pg.osasli				ei6
@ -1082,7 +1081,6 @@ chemical/x-ncbi-asn1				asn
 chemical/x-ncbi-asn1-ascii				ent prt
 chemical/x-ncbi-asn1-binary				aso val
 chemical/x-ncbi-asn1-spec				asn
-chemical/x-pdb				ent pdb
 chemical/x-rosdal				ros
 chemical/x-swissprot				sw
 chemical/x-vamas-iso14976				vms
@ -1379,3 +1377,5 @@ application/x-cbr                          cbr
 application/x-cb7                          cb7
 application/x-koboreader-ebook             kobo
 image/wmf                                  wmf
+application/ereader                        pdb
+
--- a/resources/recipes/180.recipe
+++ b/resources/recipes/180.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = '180.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/20_minutos.recipe
+++ b/resources/recipes/20_minutos.recipe
@ -0,0 +1,79 @@
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+__version__     = 'v0.85'
+__date__        = '31 January 2011'
+
+'''
+www.20minutos.es
+'''
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title          = u'20 Minutos new'
+    publisher      = u'Grupo 20 Minutos'
+
+    __author__            = 'Luis Hernandez'
+    description           = 'Free spanish newspaper'
+    cover_url     = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
+
+    oldest_article = 2
+    max_articles_per_feed = 100
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    encoding              = 'ISO-8859-1'
+    language              = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    remove_empty_feeds    = True
+
+    keep_only_tags     = [
+                                   dict(name='div', attrs={'id':['content','vinetas',]})
+                                  ,dict(name='div', attrs={'class':['boxed','description','lead','article-content','cuerpo estirar']})
+                                  ,dict(name='span', attrs={'class':['photo-bar']})
+                                  ,dict(name='ul', attrs={'class':['article-author']})
+                                ]
+
+    remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']})
+    remove_tags_after  = dict(name='div' , attrs={'class':['related-news','col']})
+
+    remove_tags = [
+                     dict(name='ol', attrs={'class':['navigation',]})
+                    ,dict(name='span', attrs={'class':['action']})
+                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
+                    ,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
+                    ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
+                    ,dict(name='ul', attrs={'id':['site-links']})
+                    ,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
+                       ]
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
+                                 """
+
+    preprocess_regexps = [(re.compile(r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
+
+    feeds = [
+              (u'Portada'              , u'http://www.20minutos.es/rss/')
+             ,(u'Nacional'             , u'http://www.20minutos.es/rss/nacional/')
+             ,(u'Internacional'       , u'http://www.20minutos.es/rss/internacional/')
+             ,(u'Economia'           , u'http://www.20minutos.es/rss/economia/')
+             ,(u'Deportes'            , u'http://www.20minutos.es/rss/deportes/')
+             ,(u'Tecnologia'          , u'http://www.20minutos.es/rss/tecnologia/')
+             ,(u'Gente - TV'         , u'http://www.20minutos.es/rss/gente-television/')
+             ,(u'Motor'                 , u'http://www.20minutos.es/rss/motor/')
+             ,(u'Salud'                 , u'http://www.20minutos.es/rss/belleza-y-salud/')
+             ,(u'Viajes'                , u'http://www.20minutos.es/rss/viajes/')
+             ,(u'Vivienda'             , u'http://www.20minutos.es/rss/vivienda/')
+             ,(u'Empleo'              , u'http://www.20minutos.es/rss/empleo/')
+             ,(u'Cine'                  , u'http://www.20minutos.es/rss/cine/')
+             ,(u'Musica'               , u'http://www.20minutos.es/rss/musica/')
+             ,(u'Vinetas'          , u'http://www.20minutos.es/rss/vinetas/')
+             ,(u'Comunidad20'     , u'http://www.20minutos.es/rss/zona20/')
+            ]
--- a/resources/recipes/7dias.recipe
+++ b/resources/recipes/7dias.recipe
@ -20,7 +20,7 @@ class SieteDias(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/abc.recipe
+++ b/resources/recipes/abc.recipe
@ -0,0 +1,43 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ABCRecipe(BasicNewsRecipe):
+  title          = u'ABC Linuxu'
+  oldest_article = 5
+  max_articles_per_feed = 3#5
+  __author__ = 'Funthomas'
+  language = 'cs'
+
+  feeds = [
+    #(u'Blogy', u'http://www.abclinuxu.cz/auto/blogDigest.rss'),
+    (u'Články', u'http://www.abclinuxu.cz/auto/abc.rss'),
+    (u'Zprávičky','http://www.abclinuxu.cz/auto/zpravicky.rss')
+  ]
+
+  remove_javascript = True
+  no_stylesheets    = True
+  remove_attributes = ['width','height']
+
+  remove_tags_before = dict(name='h1')
+  remove_tags = [
+    dict(attrs={'class':['meta-vypis','page_tools','cl_perex']}),
+    dict(attrs={'class':['cl_nadpis-link','komix-nav']})
+  ]
+
+  remove_tags_after = [
+    dict(name='div',attrs={'class':['cl_perex','komix-nav']}),
+    dict(attrs={'class':['meta-vypis','page_tools']}),
+    dict(name='',attrs={'':''}),
+  ]
+
+
+  preprocess_regexps = [
+    (re.compile(r'</div>.*<p class="perex">', re.DOTALL),lambda match: '</div><p class="perex">')
+  ]
+  def print_version(self, url):
+    return url + '?varianta=print&noDiz'
+
+  extra_css = '''
+            h1 {font-size:130%; font-weight:bold}
+            h3 {font-size:111%; font-weight:bold}
+        '''
--- a/resources/recipes/ambito.recipe
+++ b/resources/recipes/ambito.recipe
@ -58,4 +58,4 @@ class Ambito(BasicNewsRecipe):
            del item['style']
        return soup

-    language = 'es'
+    language = 'es_AR'
--- a/resources/recipes/animal_politico.recipe
+++ b/resources/recipes/animal_politico.recipe
@ -12,7 +12,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
    masthead_url   = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
    oldest_article = 1
    max_articles_per_feed = 100
-    language       = 'es'
+    language       = 'es_MX'

    #feeds          = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]

--- a/resources/recipes/axxon_magazine.recipe
+++ b/resources/recipes/axxon_magazine.recipe
@ -17,7 +17,7 @@ class Axxon_news(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    encoding              = 'utf-8'
    publication_type      = 'magazine'
    INDEX                 = 'http://axxon.com.ar/rev/'
--- a/resources/recipes/axxon_news.recipe
+++ b/resources/recipes/axxon_news.recipe
@ -18,7 +18,7 @@ class Axxon_news(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'

--- a/resources/recipes/bbc_es.recipe
+++ b/resources/recipes/bbc_es.recipe
@ -0,0 +1,53 @@
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+__version__     = 'v1.0'
+__date__        = '29 January 2011'
+
+'''
+http://www.bbc.co.uk/mundo/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title          = u'BBC Mundo'
+    publisher      = u'BBC'
+
+    __author__            = 'Luis Hernandez'
+    description           = 'BBC World for spanish readers'
+
+    cover_url     = 'http://1.bp.blogspot.com/_NHiOjk_uZwU/TEYy7IJAdAI/AAAAAAAABP8/coAE-pJ7_5E/s1600/bbcmundo_h.png'
+    oldest_article = 2
+    max_articles_per_feed = 100
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    language              = 'es'
+    remove_empty_feeds    = True
+    encoding               = 'UTF-8'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    remove_tags_before = dict(name='div' , attrs={'class':['g-group']})
+    remove_tags_after = dict(name='div' , attrs={'class':[' g-w8']})
+
+    remove_tags = [
+                             dict(name='ul', attrs={'class':['document-tools blq-clearfix','blq-clearfix']})
+                            ,dict(name='div', attrs={'class':['box bx-quote-bubble','socialmedia-links','list li-carousel','list li-plain rolling-news','list li-plain','box bx-livestats','li-tab content','list li-relatedlinks','list li-relatedinternetlinks']})
+                         ]
+
+    feeds = [
+                  (u'Portada'               , u'http://www.bbc.co.uk/mundo/index.xml')
+                 ,(u'Ultimas Noticias'   , u'http://www.bbc.co.uk/mundo/ultimas_noticias/index.xml')
+                 ,(u'Internacional'       , u'http://www.bbc.co.uk/mundo/temas/internacional/index.xml')
+                 ,(u'Economia'            , u'http://www.bbc.co.uk/mundo/temas/economia/index.xml')
+                 ,(u'America Latina'     , u'http://www.bbc.co.uk/mundo/temas/america_latina/index.xml')
+                 ,(u'Ciencia'                , u'http://www.bbc.co.uk/mundo/temas/ciencia/index.xml')
+                 ,(u'Salud'                  , u'http://www.bbc.co.uk/mundo/temas/salud/index.xml')
+                 ,(u'Tecnologia'           , u'http://www.bbc.co.uk/mundo/temas/tecnologia/index.xml')
+                 ,(u'Cultura'                , u'http://www.bbc.co.uk/mundo/temas/cultura/index.xml')
+             ]
+
--- a/resources/recipes/bitacora.recipe
+++ b/resources/recipes/bitacora.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'bitacora.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/blic.recipe
+++ b/resources/recipes/blic.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blic.rs
 '''
@ -21,21 +21,53 @@ class Blic(BasicNewsRecipe):
    masthead_url          = 'http://www.blic.rs/resources/images/header/header_back.png'
    language              = 'sr'
    publication_type      = 'newspaper'
-    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} '
+    extra_css             = """ 
+                               @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} 
+                               @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} 
+                               body{font-family: Georgia, serif1, serif} 
+                               .articledescription,#nadnaslov,.article_info{font-family: Arial, sans1, sans-serif} 
+                               .img_full{float: none}
+                               #nadnaslov{font-size: small}
+                               #article_lead{font-size: 1.5em}
+                               h1{color: red}
+                               .potpis{font-size: x-small; color: gray}
+                               .article_info{font-size: small}
+                               img{margin-bottom: 0.8em; margin-top: 0.8em; display: block} 
+                            """

    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
+                        , 'linearize_tables' : True
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    remove_tags_before = dict(name='div', attrs={'id':'article_info'})
-    remove_tags        = [dict(name=['object','link'])]
-    remove_attributes  = ['width','height']
+    remove_tags        = [dict(name=['object','link','meta','base','object','embed'])]
+    remove_attributes  = ['width','height','m_id','m_ext','mlg_id','poll_id','v_id']

-    feeds              = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]
+    feeds              = [
+                            (u'Politika'         , u'http://www.blic.rs/rss/Vesti/Politika')
+                           ,(u'Tema Dana'        , u'http://www.blic.rs/rss/Vesti/Tema-Dana')
+                           ,(u'Svet'             , u'http://www.blic.rs/rss/Vesti/Svet')
+                           ,(u'Drustvo'          , u'http://www.blic.rs/rss/Vesti/Drustvo')
+                           ,(u'Ekonomija'        , u'http://www.blic.rs/rss/Vesti/Ekonomija')
+                           ,(u'Hronika'          , u'http://www.blic.rs/rss/Vesti/Hronika')
+                           ,(u'Beograd'          , u'http://www.blic.rs/rss/Vesti/Beograd')
+                           ,(u'Srbija'           , u'http://www.blic.rs/rss/Vesti/Srbija')
+                           ,(u'Vojvodina'        , u'http://www.blic.rs/rss/Vesti/Vojvodina')
+                           ,(u'Republika Srpska' , u'http://www.blic.rs/rss/Vesti/Republika-Srpska')
+                           ,(u'Reportaza'        , u'http://www.blic.rs/rss/Vesti/Reportaza')
+                           ,(u'Dodatak'          , u'http://www.blic.rs/rss/Vesti/Dodatak')
+                           ,(u'Zabava'           , u'http://www.blic.rs/rss/Zabava')
+                           ,(u'Kultura'          , u'http://www.blic.rs/rss/Kultura')
+                           ,(u'Slobodno Vreme'   , u'http://www.blic.rs/rss/Slobodno-vreme')
+                           ,(u'IT'               , u'http://www.blic.rs/rss/IT')
+                           ,(u'Komentar'         , u'http://www.blic.rs/rss/Komentar')
+                           ,(u'Intervju'         , u'http://www.blic.rs/rss/Intervju')
+                         ]


    def print_version(self, url):
@ -44,4 +76,4 @@ class Blic(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']    
-        return self.adeify_images(soup)
+        return soup
--- a/resources/recipes/buenosaireseconomico.recipe
+++ b/resources/recipes/buenosaireseconomico.recipe
@ -20,7 +20,7 @@ class BsAsEconomico(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/calgary_herald.recipe
+++ b/resources/recipes/calgary_herald.recipe
@ -44,6 +44,7 @@ class CanWestPaper(BasicNewsRecipe):

    language = 'en_CA'
    __author__ = 'Nick Redding'
+    encoding = 'latin1'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
@ -97,7 +98,9 @@ class CanWestPaper(BasicNewsRecipe):
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
-                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                url = atag['href']
+                if not url.startswith('http:'):
+                    url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
--- a/resources/recipes/capes_n_babes.recipe
+++ b/resources/recipes/capes_n_babes.recipe
@ -0,0 +1,11 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CapesnBabesRecipe(BasicNewsRecipe):
+    title          = u'Capes n Babes'
+    language       = 'en'
+    description = 'The Capes n Babes comic Blog'
+    __author__     = 'skyhawker'
+    oldest_article = 31
+    max_articles_per_feed = 100
+    use_embedded_content = True
+    feeds          = [(u'Capes & Babes', u'feed://www.capesnbabes.com/feed/')]
--- a/resources/recipes/cinco_dias.recipe
+++ b/resources/recipes/cinco_dias.recipe
@ -0,0 +1,71 @@
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+__version__     = 'v1.2'
+__date__        = '31 January 2011'
+
+'''
+http://www.cincodias.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title          = u'Cinco Dias'
+    publisher      = u'Grupo Prisa'
+
+    __author__            = 'Luis Hernandez'
+    description           = 'spanish web about money and bussiness, free edition'
+
+    cover_url     = 'http://www.prisa.com/images/logos/logo_cinco_dias.gif'
+    oldest_article = 2
+    max_articles_per_feed = 100
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    language              = 'es'
+    remove_empty_feeds    = True
+    encoding               = 'ISO-8859-1'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    keep_only_tags     = [
+                                    dict(name='div', attrs={'class':['cab_articulo cab_noticia','pos_3','txt_noticia','mod_despiece']})
+                                   ,dict(name='p', attrs={'class':['cintillo']})
+                                ]
+
+    remove_tags_before = dict(name='div' , attrs={'class':['publi_h']})
+    remove_tags_after = dict(name='div' , attrs={'class':['tab_util util_estadisticas']})
+
+    remove_tags = [
+                             dict(name='div', attrs={'class':['util-1','util-2','util-3','inner estirar','inner1','inner2','inner3','cont','tab_util util_estadisticas','tab_util util_enviar','mod_list_inf','mod_similares','mod_divisas','mod_sectores','mod_termometro','mod post','mod_img','mod_txt','nivel estirar','barra estirar','info_brujula btnBrujula','utilidad_brujula estirar']})
+                            ,dict(name='li', attrs={'class':['lnk-fcbook','lnk-retweet','lnk-meneame','desplegable','comentarios','list-options','estirar']})
+                            ,dict(name='ul', attrs={'class':['lista-izquierda','list-options','estirar']})
+                            ,dict(name='p', attrs={'class':['autor']})
+                         ]
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
+                               h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
+                                 """
+
+    feeds = [
+                  (u'Ultima Hora'              , u'http://www.cincodias.com/rss/feed.html?feedId=17029')
+                 ,(u'Empresas'                 , u'http://www.cincodias.com/rss/feed.html?feedId=19')
+                 ,(u'Mercados'                 , u'http://www.cincodias.com/rss/feed.html?feedId=20')
+                 ,(u'Economia'                 , u'http://www.cincodias.com/rss/feed.html?feedId=21')
+                 ,(u'Tecnorama'               , u'http://www.cincodias.com/rss/feed.html?feedId=17230')
+                 ,(u'Tecnologia'                , u'http://www.cincodias.com/rss/feed.html?feedId=17106')
+                 ,(u'Finanzas Personales'  , u'http://www.cincodias.com/rss/feed.html?feedId=22')
+                 ,(u'Fiscalidad'                 , u'http://www.cincodias.com/rss/feed.html?feedId=17107')
+                 ,(u'Vivienda'                   , u'http://www.cincodias.com/rss/feed.html?feedId=17108')
+                 ,(u'Tendencias'               , u'http://www.cincodias.com/rss/feed.html?feedId=17109')
+                 ,(u'Empleo'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17110')
+                 ,(u'IBEX 35'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17125')
+                 ,(u'Sectores'                  , u'http://www.cincodias.com/rss/feed.html?feedId=17126')
+                 ,(u'Opinion'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17105')
+              ]
--- a/resources/recipes/clarin.recipe
+++ b/resources/recipes/clarin.recipe
@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
    use_embedded_content  = False
    no_stylesheets        = True
    encoding              = 'utf8'
-    language              = 'es'
+    language              = 'es_AR'
    publication_type      = 'newspaper'
    INDEX                 = 'http://www.clarin.com'
    masthead_url          = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
--- a/resources/recipes/criticadigital.recipe
+++ b/resources/recipes/criticadigital.recipe
@ -14,7 +14,7 @@ class CriticaDigital(BasicNewsRecipe):
    description           = 'Noticias de Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
-    language = 'es'
+    language = 'es_AR'

    no_stylesheets        = True
    use_embedded_content  = False
--- a/resources/recipes/cubadebate.recipe
+++ b/resources/recipes/cubadebate.recipe
@ -11,7 +11,7 @@ class CubaDebate(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Contra el Terorismo Mediatico'
    oldest_article        = 15
-    language              = 'es'
+    language              = 'es_CU'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
@ -20,8 +20,8 @@ class CubaDebate(BasicNewsRecipe):
    encoding              = 'utf-8'
    masthead_url          = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
    publication_type      = 'newsportal'
-    extra_css             = """ 
-                               #BlogTitle{font-size: xx-large; font-weight: bold} 
+    extra_css             = """
+                               #BlogTitle{font-size: xx-large; font-weight: bold}
                               body{font-family: Verdana, Arial, Tahoma, sans-serif}
                            """

@ -41,7 +41,7 @@ class CubaDebate(BasicNewsRecipe):

    feeds          = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
    remove_attributes=['width','height','lang']
-    
+
    def print_version(self, url):
        return url + 'print/'

@ -50,5 +50,5 @@ class CubaDebate(BasicNewsRecipe):
            del item['style']
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
-               item['alt'] = 'image'                
+               item['alt'] = 'image'
        return soup
--- a/resources/recipes/dailytportal.recipe
+++ b/resources/recipes/dailytportal.recipe
@ -0,0 +1,66 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+daily.tportal.hr
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Pagina12(BasicNewsRecipe):
+    title                 = 'Daily tportal.h'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Croatia'
+    publisher             = 'tportal.hr'
+    category              = 'news, politics, Croatia'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'en_HR'
+    remove_empty_feeds    = True
+    publication_type      = 'newsportal'
+    extra_css             = """
+                               body{font-family: Verdana,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                               h1,h2{color: #2D648A; font-family: Georgia,serif}
+                               .artAbstract{font-size: 1.2em; font-family: Georgia,serif}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [
+                     dict(name=['meta','link','embed','object','iframe','base'])
+                    ,dict(name='div', attrs={'class':'artInfo'})
+                  ]
+    remove_attributes=['lang']
+
+    keep_only_tags=dict(attrs={'class':'articleDetails'})
+
+    feeds = [(u'News', u'http://daily.tportal.hr/rss/dailynaslovnicarss.xml')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
+
--- a/resources/recipes/dallas.recipe
+++ b/resources/recipes/dallas.recipe
@ -7,22 +7,29 @@ class DallasNews(BasicNewsRecipe):
    max_articles_per_feed = 25

    no_stylesheets = True
-    remove_tags_before = dict(name='h2', attrs={'class':'vitstoryheadline'})
-    remove_tags_after  = dict(name='div', attrs={'style':'width: 100%; clear: right'})
-    remove_tags_after  = dict(name='div', attrs={'id':'article_tools_bottom'})
+    use_embedded_content = False
+    remove_tags_before = dict(name='h1')
+    keep_only_tags = {'class':lambda x: x and 'article' in x}
    remove_tags = [
-       dict(name='iframe'),
-       dict(name='div', attrs={'class':'biblockmore'}),
-       dict(name='div', attrs={'style':'width: 100%; clear: right'}),
-       dict(name='div', attrs={'id':'article_tools_bottom'}),
-       #dict(name='ul', attrs={'class':'articleTools'}),
+            {'class':['DMNSocialTools', 'article ', 'article first ', 'article premium']},
    ]

    feeds          = [
-                      ('Latest News', 'http://www.dallasnews.com/newskiosk/rss/dallasnewslatestnews.xml'),
-                      ('Local News', 'http://www.dallasnews.com/newskiosk/rss/dallasnewslocalnews.xml'),
-		      ('Nation and World', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsnationworld.xml'),
-		      ('Politics', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsnationalpolitics.xml'),
-		      ('Science', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsscience.xml'),
+                      ('Local News',
+                          'http://www.dallasnews.com/news/politics/local-politics/?rss'),
+                      ('National Politics',
+                          'http://www.dallasnews.com/news/politics/national-politic/?rss'),
+                      ('State Politics',
+                          'http://www.dallasnews.com/news/politics/state-politics/?rss'),
+                      ('Religion',
+                         'http://www.dallasnews.com/news/religion/?rss'),
+                      ('Crime',
+                          'http://www.dallasnews.com/news/crime/headlines/?rss'),
+                      ('Celebrity News',
+                          'http://www.dallasnews.com/entertainment/celebrity-news/?rss&listname=TopStories'),
+                      ('Nation',
+                          'http://www.dallasnews.com/news/nation-world/nation/?rss'),
+                      ('World',
+                          'http://www.dallasnews.com/news/nation-world/world/?rss'),
                    ]

--- a/resources/recipes/dbb.recipe
+++ b/resources/recipes/dbb.recipe
@ -0,0 +1,45 @@
+# -*- coding: utf-8
+
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+
+'''
+http://www.filmica.com/david_bravo/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title             = u'Blog de David Bravo'
+    publisher      = u'Filmica'
+
+    __author__  = 'Luis Hernández'
+    description   = 'blog sobre leyes, p2p y copyright'
+    cover_url     = 'http://www.elpais.es/edigitales/image.php?foto=par/portada/1551.jpg'
+
+    oldest_article = 365
+    max_articles_per_feed = 100
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    encoding              = 'ISO-8859-1'
+    language              = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    keep_only_tags     = [
+                                    dict(name='div', attrs={'class':['blog','date','blogbody','comments-head','comments-body']})
+                                   ,dict(name='span', attrs={'class':['comments-post']})
+                                ]
+
+    remove_tags_before = dict(name='div' , attrs={'id':['bitacoras']})
+    remove_tags_after  = dict(name='div' , attrs={'id':['comments-body']})
+
+    extra_css             = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h2{ font-family: sans-serif; font-size:75%; font-weight: 800; text-align: justify } h3{ font-family: sans-serif; font-size:150%; font-weight: 600; text-align: left } img{margin-bottom: 0.4em} '
+
+
+
+    feeds          = [(u'Blog', u'http://www.filmica.com/david_bravo/index.rdf')]
--- a/resources/recipes/deutsche_welle_es.recipe
+++ b/resources/recipes/deutsche_welle_es.recipe
@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
-    language              = 'es'
+    language              = 'de_ES'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
--- a/resources/recipes/diagonales.recipe
+++ b/resources/recipes/diagonales.recipe
@ -20,7 +20,7 @@ class Diagonales(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@ -22,8 +22,11 @@ class Economist(BasicNewsRecipe):

    oldest_article = 7.0
    cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
-    remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
-            dict(attrs={'class':['dblClkTrk', 'ec-article-info']})]
+    remove_tags = [
+            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
+            dict(attrs={'class':['dblClkTrk', 'ec-article-info']}),
+            {'class': lambda x: x and 'share-links-header' in x},
+    ]
    keep_only_tags = [dict(id='ec-article-body')]
    needs_subscription = False
    no_stylesheets = True
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -16,8 +16,11 @@ class Economist(BasicNewsRecipe):

    oldest_article = 7.0
    cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
-    remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
-            dict(attrs={'class':['dblClkTrk', 'ec-article-info']})]
+    remove_tags = [
+            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
+            dict(attrs={'class':['dblClkTrk', 'ec-article-info']}),
+            {'class': lambda x: x and 'share-links-header' in x},
+    ]
    keep_only_tags = [dict(id='ec-article-body')]
    no_stylesheets = True
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
--- a/resources/recipes/el_mercurio_chile.recipe
+++ b/resources/recipes/el_mercurio_chile.recipe
@ -20,8 +20,8 @@ class ElMercurio(BasicNewsRecipe):
    masthead_url          = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
    remove_javascript     = True
    use_embedded_content  = False
-    language              = 'es'
-    
+    language              = 'es_CL'
+

    conversion_options = {
                          'comment'   : description
@ -33,7 +33,7 @@ class ElMercurio(BasicNewsRecipe):
    keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})]
    remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})]
    remove_attributes = ['height','width']
-    
+
    feeds = [
               (u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0')
              ,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1')
--- a/resources/recipes/el_observador.recipe
+++ b/resources/recipes/el_observador.recipe
@ -13,7 +13,7 @@ class ObservaDigital(BasicNewsRecipe):
    title                 = 'Observa Digital'
    __author__            = 'yrvn'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/el_pais.recipe
+++ b/resources/recipes/el_pais.recipe
@ -9,13 +9,14 @@ __docformat__ = 'restructuredtext en'
 elpais.es
 '''

+from time import strftime
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class ElPais(BasicNewsRecipe):
    __author__        = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells'
    description   = 'Main daily newspaper from Spain'

-    cover_url      = 'http://www.elpais.com/im/tit_logo_global.gif'
    title          = u'El Pais'
    publisher      = u'Ediciones El Pa\xeds SL'
    category       = 'News, politics, culture, economy, general interest'
@ -62,6 +63,6 @@ class ElPais(BasicNewsRecipe):
                        (u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058')
                        ]

-def print_version(self, url):
-    url = url+'?print=1'
-    return url
+    def get_cover_url(self):
+        return 'http://img5.kiosko.net/' + strftime("%Y/%m/%d") + '/es/elpais.750.jpg'
+
--- a/resources/recipes/el_pais_uy.recipe
+++ b/resources/recipes/el_pais_uy.recipe
@ -14,7 +14,7 @@ class General(BasicNewsRecipe):
    description           = 'Noticias de Uruguay y el resto del mundo'
    publisher             = 'EL PAIS S.A.'
    category              = 'news, politics, Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 2
--- a/resources/recipes/el_universal.recipe
+++ b/resources/recipes/el_universal.recipe
@ -20,7 +20,7 @@ class ElUniversal(BasicNewsRecipe):
    remove_javascript     = True
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
-    language              = 'es'
+    language              = 'es_MX'

    extra_css = '''
                    body{font-family:Arial,Helvetica,sans-serif}
--- a/resources/recipes/elargentino.recipe
+++ b/resources/recipes/elargentino.recipe
@ -12,7 +12,7 @@ class ElArgentino(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Informacion Libre las 24 horas'
    publisher             = 'ElArgentino.com'
-    category              = 'news, politics, Argentina'    
+    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    remove_javascript     = True
@ -20,7 +20,7 @@ class ElArgentino(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'utf8'
    cover_url             = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
-    language = 'es'
+    language = 'es_AR'


    html2lrf_options = [
@ -28,16 +28,16 @@ class ElArgentino(BasicNewsRecipe):
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

    remove_tags = [
                     dict(name='div', attrs={'id':'noprint'              })
                    ,dict(name='div', attrs={'class':'encabezadoImprimir'})
                    ,dict(name='a'  , attrs={'target':'_blank'           })
                  ]
-    
-    feeds = [ 
+
+    feeds = [
              (u'Portada'     , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home'                                             )
             ,(u'Pais'        , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs'        )
             ,(u'Economia'    , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa'    )
@ -51,12 +51,12 @@ class ElArgentino(BasicNewsRecipe):

    def print_version(self, url):
        main, sep, article_part = url.partition('/nota-')
-        article_id, rsep, rrest = article_part.partition('-')    
+        article_id, rsep, rrest = article_part.partition('-')
        return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id

    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
-            del item['style']        
+            del item['style']
        return soup
--- a/resources/recipes/elcomercio.recipe
+++ b/resources/recipes/elcomercio.recipe
@ -18,7 +18,7 @@ class ElComercio(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
-    language              = 'es'
+    language              = 'es_EC'
    masthead_url          = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
    extra_css             = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '

--- a/resources/recipes/elcronista.recipe
+++ b/resources/recipes/elcronista.recipe
@ -13,7 +13,7 @@ class ElCronista(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina'
    oldest_article        = 2
-    language = 'es'
+    language = 'es_AR'

    max_articles_per_feed = 100
    no_stylesheets        = True
@ -25,14 +25,14 @@ class ElCronista(BasicNewsRecipe):
                        , '--category'      , 'news, Argentina'
                        , '--publisher'     , title
                        ]
-    
+
    keep_only_tags = [
                        dict(name='table', attrs={'width':'100%'             })
                       ,dict(name='h1'   , attrs={'class':'Arialgris16normal'})
                     ]

    remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
-                     
+
    feeds = [
               (u'Economia'                , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml'             )
              ,(u'Negocios'                , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml'             )
@ -69,4 +69,4 @@ class ElCronista(BasicNewsRecipe):
        if link_item:
           cover_url = index + link_item.img['src']
        return cover_url
-        
+
--- a/resources/recipes/eltiempo_hn.recipe
+++ b/resources/recipes/eltiempo_hn.recipe
@ -21,7 +21,7 @@ class ElTiempoHn(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'

    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/eluniversal_ve.recipe
+++ b/resources/recipes/eluniversal_ve.recipe
@ -18,7 +18,7 @@ class ElUniversal(BasicNewsRecipe):
    encoding               = 'cp1252'
    publisher              = 'El Universal'
    category               = 'news, Caracas, Venezuela, world'
-    language               = 'es'
+    language               = 'es_VE'
    cover_url              = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')

    conversion_options = {
--- a/resources/recipes/eluniversalimpresa.recipe
+++ b/resources/recipes/eluniversalimpresa.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class ElUniversalImpresaRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
-    language = 'es'
+    language = 'es_MX'
    version = 1

    title = u'El Universal (Edici\u00F3n Impresa)'
--- a/resources/recipes/eluniverso_ec.recipe
+++ b/resources/recipes/eluniverso_ec.recipe
@ -17,7 +17,7 @@ class ElUniverso_Ecuador(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_EC'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
--- a/resources/recipes/endgadget_ja.recipe
+++ b/resources/recipes/endgadget_ja.recipe
@ -18,3 +18,6 @@ class EndgadgetJapan(BasicNewsRecipe):
    language = 'ja'
    encoding = 'utf-8'
    feeds          = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
+
+    remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
+    remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})
--- a/resources/recipes/everett_herald.recipe
+++ b/resources/recipes/everett_herald.recipe
@ -0,0 +1,36 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1295088390(BasicNewsRecipe):
+     title          = u'Everett Herald'
+     language = 'en'
+     __author__ = '77ja65'
+     oldest_article = 4
+     max_articles_per_feed = 50
+     no_stylesheets = True
+     masthead_url          = 'http://heraldnet.com/images/hnet/jQueryComponents/jQueryNavigation/heraldnet_logo.png'
+     extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
+
+     feeds          = [(u'Local News',
+ u'http://heraldnet.com/section/RSS02&mime=xml'),
+         (u'Sports', u'http://heraldnet.com/section/RSS04&mime=xml'),
+         (u'Entertainment',
+ u'http://heraldnet.com/section/RSS07&mime=xml'),
+         (u'Life', u'http://heraldnet.com/section/RSS03&mime=xml'),
+         (u'Breaking News',
+ u'http://heraldnet.com/section/RSS34&mime=xml'),
+         (u'Seahawks', u'http://heraldnet.com/section/RSS22&mime=xml'),
+         (u'HeraldNet', u'http://heraldnet.com/section/RSS01&mime=xml'),
+         (u'Inside Everett',
+ u'http://heraldnet.com/section/RSS26&mime=xml')
+         ]
+
+     def print_version(self, url):
+       return url + "&template=PrinterFriendly"
+
+     extra_css = '''
+                     h1{font-family:Arial,Helvetica,sans-serif; font-
+ weight:bold;font-size:large;}
+                     h2{font-family:Arial,Helvetica,sans-serif; font-
+ weight:normal;font-size:small;}
+                 '''
+
--- a/resources/recipes/explosm.recipe
+++ b/resources/recipes/explosm.recipe
@ -0,0 +1,54 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Explosm(BasicNewsRecipe):
+    title              = u'Explosm Rotated'
+    __author__        = 'Andromeda Rabbit'
+    description      = 'Explosm'
+    language            = 'en'
+    use_embedded_content = False
+    no_stylesheets    = True
+    oldest_article    = 24
+    remove_javascript   = True
+    remove_empty_feeds  = True
+    max_articles_per_feed = 10
+
+    feeds = [
+             (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
+             ]
+
+    #match_regexps = [r'http://www.explosm.net/comics/.*']
+
+    keep_only_tags   = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
+    remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
+
+    def get_cover_url(self):
+        return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
+
+    def skip_ad_pages(self, soup):
+        # Skip ad pages served before actual article
+        skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
+        if skip_tag is None:
+            return soup
+        return None
--- a/resources/recipes/freeway.recipe
+++ b/resources/recipes/freeway.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'freeway.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Revista Freeway, Montevideo, Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 1
--- a/resources/recipes/granma.recipe
+++ b/resources/recipes/granma.recipe
@ -20,7 +20,7 @@ class Granma(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
-    language = 'es'
+    language = 'es_CU'

    remove_javascript     = True

--- a/resources/recipes/gulfnews.recipe
+++ b/resources/recipes/gulfnews.recipe
@ -0,0 +1,64 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+gulfnews.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GulfNews(BasicNewsRecipe):
+    title                 = 'Gulf News'
+    __author__            = 'Darko Miletic'
+    description           = 'News from United Arab Emirrates, persian gulf and rest of the world'
+    publisher             = 'Al Nisr Publishing LLC'
+    category              = 'news, politics, UAE, world'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'newsportal'
+    masthead_url          = 'http://gulfnews.com/media/img/gulf_news_logo.jpg'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                               h1{font-family: Georgia, 'Times New Roman', Times, serif}
+                               ol,ul{list-style: none}
+                               .synopsis{font-size: small}
+                               .details{font-size: x-small}
+                               .image{font-size: xx-small}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [
+                     dict(name=['meta','link','object','embed'])
+                     ,dict(attrs={'class':['quickLinks','ratings']})
+                     ,dict(attrs={'id':'imageSelector'})
+                  ]
+    remove_attributes=['lang']
+    keep_only_tags=[
+                     dict(name='h1')
+                     ,dict(attrs={'class':['synopsis','details','image','article']})
+                   ]
+
+
+    feeds = [
+              (u'UAE News'      , u'http://gulfnews.com/cmlink/1.446094')
+             ,(u'Business'      , u'http://gulfnews.com/cmlink/1.446098')
+             ,(u'Entertainment' , u'http://gulfnews.com/cmlink/1.446095')
+             ,(u'Sport'         , u'http://gulfnews.com/cmlink/1.446096')
+             ,(u'Life'          , u'http://gulfnews.com/cmlink/1.446097')
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/heise.recipe
+++ b/resources/recipes/heise.recipe
@ -52,6 +52,7 @@ class heiseDe(BasicNewsRecipe):
 		   dict(id='navi_login'),
 		   dict(id='navigation'),
 		   dict(id='breadcrumb'),
+		   dict(id='adblockerwarnung'),
 		   dict(id=''),
 		   dict(id='sitemap'),
 		   dict(id='bannerzone'),
@ -67,3 +68,4 @@ class heiseDe(BasicNewsRecipe):



+
--- a/resources/recipes/hna.recipe
+++ b/resources/recipes/hna.recipe
@ -21,7 +21,7 @@ class hnaDe(BasicNewsRecipe):
    max_articles_per_feed = 40
    no_stylesheets = True
    remove_javascript = True
-    encoding = 'iso-8859-1'
+    encoding = 'utf-8'

    remove_tags = [dict(id='topnav'),
 		   dict(id='nav_main'),
@ -60,3 +60,4 @@ class hnaDe(BasicNewsRecipe):
    feeds =  [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
 	       ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ]

+
--- a/resources/recipes/ibm_smarter_planet.recipe
+++ b/resources/recipes/ibm_smarter_planet.recipe
@ -1,16 +1,18 @@
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1293122276(BasicNewsRecipe):
-    title          = u'Smarter Planet | Tumblr for eReaders'
+    title          = u'Smarter Planet | Tumblr'
    __author__ = 'Jack Mason'
    author  = 'IBM Global Business Services'
    publisher             = 'IBM'
+    language = 'en'
    category              = 'news, technology, IT, internet of things, analytics'
-    oldest_article = 7
+    oldest_article = 14
    max_articles_per_feed = 30
    no_stylesheets = True
    use_embedded_content = False
-    masthead_url          = 'http://30.media.tumblr.com/tumblr_l70dow9UmU1qzs4rbo1_r3_250.jpg'
+    masthead_url          = 'http://www.hellercd.com/wp-content/uploads/2010/09/hero.jpg'
    remove_tags_before = dict(id='item')
    remove_tags_after  = dict(id='item')
    remove_tags = [dict(attrs={'class':['sidebar', 'about', 'footer', 'description,' 'disqus', 'nav', 'notes', 'disqus_thread']}),
@ -20,4 +22,3 @@ class AdvancedUserRecipe1293122276(BasicNewsRecipe):


    feeds          = [(u'Smarter Planet Tumblr', u'http://smarterplanet.tumblr.com/mobile/rss')]
-
--- a/resources/recipes/idnes.recipe
+++ b/resources/recipes/idnes.recipe
@ -0,0 +1,54 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class iHeuteRecipe(BasicNewsRecipe):
+    __author__  = 'FunThomas'
+    title = u'iDnes.cz'
+    publisher = u'MAFRA a.s.'
+    description = 'iDNES.cz Zprávy, Technet, Komiksy a další'
+    oldest_article = 3
+    max_articles_per_feed = 2
+
+    feeds = [
+            (u'Zprávy', u'http://servis.idnes.cz/rss.asp?c=zpravodaj'),
+            (u'Sport', u'http://servis.idnes.cz/rss.asp?c=sport'),
+            (u'Technet', u'http://servis.idnes.cz/rss.asp?c=technet'),
+            (u'Mobil', u'http://servis.idnes.cz/rss.asp?c=mobil'),
+            (u'Ekonomika', u'http://servis.idnes.cz/rss.asp?c=ekonomikah'),
+            #(u'Kultura', u'http://servis.idnes.cz/rss.asp?c=kultura'),
+            (u'Cestování', u'http://servis.idnes.cz/rss.asp?c=iglobe'),
+            #(u'Kavárna', u'http://servis.idnes.cz/rss.asp?r=kavarna'),
+            (u'Komixy', u'http://servis.idnes.cz/rss.asp?c=komiksy')
+            ]
+
+
+    encoding = 'cp1250'
+    language = 'cs'
+    cover_url = 'http://g.idnes.cz/u/loga-n4/idnes.gif'
+    remove_javascript = True
+    no_stylesheets = True
+
+    remove_attributes = ['width','height']
+    remove_tags = [dict(name='div',   attrs={'id':['zooming']}),
+                   dict(name='div',   attrs={'class':['related','mapa-wrapper']}),
+                   dict(name='table', attrs={'id':['opener-img','portal']}),
+                   dict(name='table', attrs={'class':['video-16ku9']})]
+    remove_tags_after  = [dict(name='div',attrs={'id':['related','related2']})]
+
+    keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day']})
+                      ,dict(name='table',attrs={'class':['kemel-box']})]
+
+    def print_version(self, url):
+        print_url = url
+        split_url = url.split("?")
+        if (split_url[0].rfind('dilbert.asp')  != -1):      #dilbert komix
+            print_url = print_url.replace('.htm','.gif&tisk=1')
+            print_url = print_url.replace('.asp','.aspx')
+        elif (split_url[0].rfind('kemel.asp')  == -1):      #not Kemel komix
+            print_url = 'http://zpravy.idnes.cz/tiskni.asp?' +  split_url[1]
+        #kemel                  kemel print page doesn't work
+        return print_url
+
+    extra_css = '''
+                  h1 {font-size:125%; font-weight:bold}
+                  h3 {font-size:110%; font-weight:bold}
+                '''
--- a/resources/recipes/ieco.recipe
+++ b/resources/recipes/ieco.recipe
@ -18,7 +18,7 @@ class iEco(BasicNewsRecipe):
    encoding              = 'utf-8'
    publisher             = 'Grupo Clarin'
    category              = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion'
-    language              = 'es'
+    language              = 'es_AR'
    cover_url             = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif'
    extra_css             = ' #bd{font-family: sans-serif} '

--- a/resources/recipes/ihned.recipe
+++ b/resources/recipes/ihned.recipe
@ -5,7 +5,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class IHNed(BasicNewsRecipe):


-    stahnout_vsechny = False
+    stahnout_vsechny = True
        #True   = stahuje vsechny z homepage
        #False  = stahuje pouze dnesni clanky (ze dne, kdy je skript spusten)

--- a/resources/recipes/infobae.recipe
+++ b/resources/recipes/infobae.recipe
@ -16,7 +16,7 @@ class Infobae(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    encoding              = 'cp1252'
    masthead_url          = 'http://www.infobae.com/imgs/header/header.gif'
    remove_javascript     = True
@ -25,7 +25,7 @@ class Infobae(BasicNewsRecipe):
                              body{font-family:Arial,Helvetica,sans-serif;}
                              .popUpTitulo{color:#0D4261; font-size: xx-large}
                            '''
-    
+
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
@ -33,7 +33,7 @@ class Infobae(BasicNewsRecipe):
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
-    
+

    feeds = [
              (u'Noticias'  , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml'       )
--- a/resources/recipes/juventudrebelde.recipe
+++ b/resources/recipes/juventudrebelde.recipe
@ -20,7 +20,7 @@ class Juventudrebelde(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    language = 'es'
+    language = 'es_CU'

    cover_url             = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
    remove_javascript     = True
--- a/resources/recipes/karlsruhe.recipe
+++ b/resources/recipes/karlsruhe.recipe
@ -6,6 +6,7 @@ class KANewsRecipe(BasicNewsRecipe):
    description = u'Nachrichten aus Karlsruhe, Deutschland und der Welt.'
    __author__ = 'tfeld'
    lang='de'
+    language = 'de'
    no_stylesheets = True

    oldest_article = 7
--- a/resources/recipes/kath_net.recipe
+++ b/resources/recipes/kath_net.recipe
@ -4,6 +4,7 @@ class AdvancedUserRecipe1295262156(BasicNewsRecipe):
    title          = u'kath.net'
    __author__     = 'Bobus'
    oldest_article = 7
+    language = 'en'
    max_articles_per_feed = 100

    feeds          = [(u'kath.net', u'http://www.kath.net/2005/xml/index.xml')]
--- a/resources/recipes/la_cuarta.recipe
+++ b/resources/recipes/la_cuarta.recipe
@ -50,4 +50,4 @@ class LaCuarta(BasicNewsRecipe):
    feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]


-    language = 'es'
+    language = 'es_CL'
--- a/resources/recipes/la_diaria.recipe
+++ b/resources/recipes/la_diaria.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'La Diaria'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/la_jornada.recipe
+++ b/resources/recipes/la_jornada.recipe
@ -19,7 +19,7 @@ class LaJornada_mx(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_MX'
    remove_empty_feeds    = True
    cover_url             = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf")
    masthead_url          = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
@ -34,8 +34,8 @@ class LaJornada_mx(BasicNewsRecipe):
                                .credito{font-weight: bold; margin-left: 1em}
                                .credito-autor{font-variant: small-caps; font-weight: bold }
                                .credito-titulo{text-align: right}
-                                .hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em } 
-                                .loc{font-weight: bold} 
+                                .hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
+                                .loc{font-weight: bold}
                                .carton{text-align: center}
                                .credit{font-weight: bold}
                                .sumario{font-weight: bold; text-align: center}
@ -56,7 +56,7 @@ class LaJornada_mx(BasicNewsRecipe):
                                       ,re.DOTALL|re.IGNORECASE)
                                       ,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
                         ]
-                        
+
    keep_only_tags = [
                         dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','credito-articulo','text','carton']})
                        ,dict(name='div', attrs={'id':'renderComments'})
@ -88,4 +88,4 @@ class LaJornada_mx(BasicNewsRecipe):
    def get_article_url(self, article):
        rurl = article.get('link',  None)
        return rurl.rpartition('&partner=')[0]
-        
+
--- a/resources/recipes/la_nueva.recipe
+++ b/resources/recipes/la_nueva.recipe
@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+description   = 'Diario independiente de Asturias - v1.0 - 27 Jan 2011'
+
+'''
+www.lne.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title          = u'La Nueva España'
+    publisher      = u'Editorial Prensa Iberica'
+
+    __author__            = 'Luis Hernandez'
+    description           = 'Diario independiente de Asturias'
+    cover_url     = 'http://estaticos00.lne.es//elementosWeb/mediaweb/images/iconos/logo2.jpg'
+
+    oldest_article = 3
+    max_articles_per_feed = 100
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    encoding              = 'ISO-8859-1'
+    language              = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    keep_only_tags = [
+                                dict(name='div', attrs={'class':['noticia_titular','subtitulo','noticiadd2','noticia_texto']})
+                               ,dict(name='div', attrs={'id':['noticia_texto']})
+                             ]
+
+    extra_css             = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:150%; font-weight: 600; text-align: justify; } h2{ font-family: sans-serif; font-size:120%; font-weight: 500; text-align: justify } '
+
+
+    remove_tags_before = dict(name='div' , attrs={'class':['contenedor']})
+    remove_tags_after = dict(name='div' , attrs={'class':['fin_noticia']})
+
+    remove_tags = [
+                             dict(name='div', attrs={'class':['epigrafe','antetitulo','bloqueclear','bloqueclear_video','cuadro_multimedia','cintillo2','editor_documentos','noticiadd','noticiadd3','noticiainterior','fin_noticia']})
+                            ,dict(name='div', attrs={'id':['evotos']})
+                         ]
+
+    feeds = [
+                   (u'Al minuto'            , u'http://www.lne.es/elementosInt/rss/AlMinuto')
+                  ,(u'General'              , u'http://www.lne.es/elementosInt/rss/55')
+                  ,(u'Nacional'             , u'http://www.lne.es/elementosInt/rss/43')
+                  ,(u'Internacional'      , u'http://www.lne.es/elementosInt/rss/44')
+                  ,(u'Economia'           , u'http://www.lne.es/elementosInt/rss/45')
+                  ,(u'Deportes'            , u'http://www.lne.es/elementosInt/rss/47')
+                  ,(u'Campeones'        , u'http://www.lne.es/elementosInt/rss/65')
+                  ,(u'Sociedad'            , u'http://www.lne.es/elementosInt/rss/46')
+                  ,(u'Sucesos'             , u'http://www.lne.es/elementosInt/rss/48')
+                  ,(u'Galeria'               , u'http://www.lne.es/elementosInt/rss/51')
+                  ,(u'Cultura'               , u'http://www.lne.es/elementosInt/rss/66')
+                  ,(u'Motor'                 , u'http://www.lne.es/elementosInt/rss/62')
+                  ,(u'Opinion'              , u'http://www.lne.es/elementosInt/rss/52')
+                  ,(u'Asturias'              , u'http://www.lne.es/elementosInt/rss/42')
+                  ,(u'Oviedo'               , u'http://www.lne.es/elementosInt/rss/31')
+                  ,(u'Gijon'                  , u'http://www.lne.es/elementosInt/rss/35')
+                  ,(u'Aviles'                 , u'http://www.lne.es/elementosInt/rss/36')
+                  ,(u'Nalon'                 , u'http://www.lne.es/elementosInt/rss/37')
+                  ,(u'Cuencas'             , u'http://www.lne.es/elementosInt/rss/38')
+                  ,(u'Caudal'               , u'http://www.lne.es/elementosInt/rss/39')
+                  ,(u'Oriente'              , u'http://www.lne.es/elementosInt/rss/40')
+                  ,(u'Occidente'          , u'http://www.lne.es/elementosInt/rss/41')
+                  ,(u'Mar y Campo'     , u'http://www.lne.es/elementosInt/rss/63')
+                  ,(u'Ultima'               , u'http://www.lne.es/elementosInt/rss/50')
+]
--- a/resources/recipes/la_razon_bo.recipe
+++ b/resources/recipes/la_razon_bo.recipe
@ -18,7 +18,7 @@ class LaRazon_Bol(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_BO'
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
--- a/resources/recipes/la_segunda.recipe
+++ b/resources/recipes/la_segunda.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class LaSegunda(BasicNewsRecipe):
    title                 = 'La Segunda'
    __author__            = 'Darko Miletic'
-    description           = 'El sitio de noticias online de Chile' 
+    description           = 'El sitio de noticias online de Chile'
    publisher             = 'La Segunda'
    category              = 'news, politics, Chile'
    oldest_article        = 2
@ -19,9 +19,9 @@ class LaSegunda(BasicNewsRecipe):
    encoding              = 'cp1252'
    masthead_url          = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
    remove_empty_feeds    = True
-    language              = 'es'
-    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '            
-    
+    language              = 'es_CL'
+    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
+
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
@ -29,13 +29,13 @@ class LaSegunda(BasicNewsRecipe):
                        , 'language'         : language
 						, 'linearize_tables' : True
                        }
-                        
+
    remove_tags_before = dict(attrs={'class':'titulonegritastop'})
    remove_tags        = [dict(name='img')]
    remove_attributes  = ['width','height']
-	
-                        
-    feeds = [ 
+
+
+    feeds = [
               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
              ,(u'Politica'               , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
              ,(u'Cronica'                , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
@ -49,6 +49,6 @@ class LaSegunda(BasicNewsRecipe):
            ]

    def print_version(self, url):
-        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
+        rest, sep, article_id = url.partition('index.asp?idnoticia=')
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
-    
+
--- a/resources/recipes/la_tribuna.recipe
+++ b/resources/recipes/la_tribuna.recipe
@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__author__    = 'Luis Hernandez'
+__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
+__version__     = 'v1.0'
+__date__        = '01 Feb 2011'
+
+'''
+http://www.promecal.es/
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
+
+    title             = u'La Tribuna de'
+    publisher      = u'Grupo PROMECAL'
+
+    __author__  = 'Luis Hernández'
+    description   = 'Varios diarios locales del grupo PROMECAL'
+
+    oldest_article = 3
+    max_articles_per_feed = 50
+
+    remove_javascript = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+
+    encoding              = 'utf-8'
+    language              = 'es_ES'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    keep_only_tags     = [
+                                   dict(name='div', attrs={'id':['articulo']})
+                                  ,dict(name='div', attrs={'class':['foto']})
+                                  ,dict(name='p', attrs={'id':['texto']})
+                                ]
+
+    remove_tags_before = dict(name='div' , attrs={'class':['comparte']})
+    remove_tags_after  = dict(name='div' , attrs={'id':['relacionadas']})
+
+    remove_tags  = [
+                              dict(name='div', attrs={'id':['relacionadas']})
+                             ,dict(name='h3')
+                             ,dict(name='h5')
+                          ]
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{text-align: left; font-family: serif; font-size: 100%}
+                               h1{font-family: sans; font-size:150%; font-weight: bold; text-align: justify;}
+                               h2{font-family: sans-serif; font-size:85%; font-style: italic; text-align: justify;}
+                               h4{font-family: sans; font-size:75%; font-weight: bold; text-align: center;}
+                               img{margin-bottom: 0.4em}
+                                """
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+        return soup
+
+
+    feeds          = [
+           (u'Albacete', u'http://www.latribunadealbacete.es/rss.html')
+          ,(u'Avila', u'http://www.diariodeavila.es/rss.html')
+          ,(u'Burgos', u'http://www.diariodeburgos.es/rss.html')
+          ,(u'Ciudad Real', u'http://www.latribunadeciudadreal.es/rss.html')
+          ,(u'Palencia', u'http://www.diariopalentino.es/rss.html')
+          ,(u'Puertollano', u'http://www.latribunadepuertollano.es/rss.html')
+          ,(u'Talavera de la Reina', u'http://www.latribunadetalavera.es/rss.html')
+          ,(u'Toledo', u'http://www.latribunadetoledo.es/rss.html')
+          ,(u'Valladolid', u'http://www.eldiadevalladolid.com/rss.html')
+                       ]
--- a/resources/recipes/lamujerdemivida.recipe
+++ b/resources/recipes/lamujerdemivida.recipe
@ -11,15 +11,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class LaMujerDeMiVida(BasicNewsRecipe):
    title                 = 'La Mujer de mi Vida'
    __author__            = 'Darko Miletic'
-    description           = 'Cultura de otra manera'    
+    description           = 'Cultura de otra manera'
    oldest_article        = 90
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    publisher             = 'La Mujer de mi Vida'
-    category              = 'literatura, critica, arte, ensayos'    
-    language = 'es'
+    category              = 'literatura, critica, arte, ensayos'
+    language = 'es_AR'

    INDEX                 = 'http://www.lamujerdemivida.com.ar/'
    html2lrf_options = [
@ -28,8 +28,8 @@ class LaMujerDeMiVida(BasicNewsRecipe):
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'

    keep_only_tags = [dict(name='table', attrs={'width':'570'})]

@ -51,7 +51,7 @@ class LaMujerDeMiVida(BasicNewsRecipe):
        if cover_item:
           cover_url = self.INDEX + cover_item['src']
        return cover_url
-    
+
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
@ -74,4 +74,4 @@ class LaMujerDeMiVida(BasicNewsRecipe):
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
-                
+
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -16,17 +16,17 @@ class Lanacion(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
-    language              = 'es'
+    language              = 'es_AR'
    publication_type      = 'newspaper'
-    remove_empty_feeds    = True    
+    remove_empty_feeds    = True
    masthead_url          = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
    extra_css             = """ h1{font-family: Georgia,serif}
-                                h2{color: #626262}    
-                                body{font-family: Arial,sans-serif} 
+                                h2{color: #626262}
+                                body{font-family: Arial,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
-                                .notaFecha{color: #808080}                                
-                                .notaEpigrafe{font-size: x-small} 
-                                .topNota h1{font-family: Arial,sans-serif} 
+                                .notaFecha{color: #808080}
+                                .notaEpigrafe{font-size: x-small}
+                                .topNota h1{font-family: Arial,sans-serif}
                            """


@ -45,7 +45,7 @@ class Lanacion(BasicNewsRecipe):
                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
                    ,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
                  ]
-    remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})                
+    remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
    remove_attributes = ['height','width','visible','onclick','data-count','name']

    feeds          = [
--- a/resources/recipes/lanacion_chile.recipe
+++ b/resources/recipes/lanacion_chile.recipe
@ -51,4 +51,4 @@ class LaNacionChile(BasicNewsRecipe):
            del item['style']
        return soup

-    language = 'es'
+    language = 'es_CL'
--- a/resources/recipes/laprensa.recipe
+++ b/resources/recipes/laprensa.recipe
@ -21,9 +21,9 @@ class LaPrensa(BasicNewsRecipe):
    encoding              = 'cp1252'
   # cover_url             = 'http://www.laprensa.com.ar/imgs/logo.gif'
    remove_javascript     = True
-    language = 'es'
+    language = 'es_AR'
    lang = 'es'
-    
+
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
@ -32,7 +32,7 @@ class LaPrensa(BasicNewsRecipe):

    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    filter_regexps = [r'.*archive.aspx.*']
-   
+
    remove_tags  = [
                    dict(name='td', attrs={'class':["link-registro","link-buscador"]}),
                    dict(name='td', attrs={'id':["TDTabItem1","TDTabItem2","TDTabItem3","TDTabItem4"]}),
@ -58,9 +58,9 @@ class LaPrensa(BasicNewsRecipe):
                    dict(name='img', src = "/versions/1/imgs/separador-linea-azul.gif"),
                    dict(name='img', src = " /versions/1/imgs/separador-linea.gif"),
                    dict(name='a',text ="Powered by Civinext Groupware - V. 2.0.3567.23706"),
-                    dict(name='img', height ="0")                   
+                    dict(name='img', height ="0")
                    ]
-                            
+
    extra_css = '''
                    .seccion{font-size:xx-small;}
                    body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -69,7 +69,7 @@ class LaPrensa(BasicNewsRecipe):
                    .fecha{font-size:xx-small;}
                    .volanta{font-size:xx-small;}
                '''
-    
+
    feeds = [
              (u'Politica'    , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=4' )
             ,(u'Economia'    , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=5' )
@ -80,14 +80,14 @@ class LaPrensa(BasicNewsRecipe):
             ,(u'Espectaculos', u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx?Rss=10')
            ]

-    
+
    def preprocess_html(self, soup):
-        
+
        for t in soup.findAll(['table','td','tr','span','tbody']):
            t.name = 'div'
        for t in soup.findAll(['hr']):
            t.extract()
-        
+
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
@ -95,8 +95,8 @@ class LaPrensa(BasicNewsRecipe):
        for item in soup.findAll(align = "center"):
                del item['align']
        for item in soup.findAll(bgcolor="ffffff"):
-            del item['bgcolor']               
+            del item['bgcolor']
        return soup
-    
-   
-    
+
+
+
--- a/resources/recipes/laprensa_hn.recipe
+++ b/resources/recipes/laprensa_hn.recipe
@ -21,7 +21,7 @@ class LaPrensaHn(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'

    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/laprensa_ni.recipe
+++ b/resources/recipes/laprensa_ni.recipe
@ -22,7 +22,7 @@ class LaPrensa_ni(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1252'
    remove_javascript     = True
-    language = 'es'
+    language = 'es_NI'

    months_es             = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
    current_month         = months_es[datetime.date.today().month - 1]
--- a/resources/recipes/las_vegas_review.recipe
+++ b/resources/recipes/las_vegas_review.recipe
@ -3,12 +3,17 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1274742400(BasicNewsRecipe):

    title = u'Las Vegas Review Journal'
-    __author__ = 'Joel'
+    __author__ = 'Kovid Goyal'
    language = 'en'

    oldest_article = 7

    max_articles_per_feed = 100
+    keep_only_tags = [dict(id='content-main')]
+    remove_tags = [dict(id=['right-col-content', 'trending-topics']),
+            {'class':['ppy-outer']}
+            ]
+    no_stylesheets = True

    feeds = [
            (u'News', u'http://www.lvrj.com/news.rss'),
--- a/resources/recipes/latimes.recipe
+++ b/resources/recipes/latimes.recipe
@ -1,73 +1,92 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-latimes.com
+www.latimes.com
 '''
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class LATimes(BasicNewsRecipe):
-    title                 = u'The Los Angeles Times'
-    __author__            = u'Darko Miletic and Sujata Raman'
-    description           = u'News from Los Angeles'
-    oldest_article        = 7
-    max_articles_per_feed = 100
-    language              = 'en'
+    title                 = 'Los Angeles Times'
+    __author__            = 'Darko Miletic'
+    description           = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
+    publisher             = 'Tribune Company'
+    category              = 'news, politics, USA, Los Angeles, world'
+    oldest_article        = 2
+    max_articles_per_feed = 200
    no_stylesheets        = True
+    encoding              = 'utf8'
    use_embedded_content  = False
-    encoding              = 'utf-8'
-    lang                  = 'en-US'
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.latimes.com/images/logo.png'
+    cover_url             = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
+    extra_css             = """
+                               body{font-family: Georgia,"Times New Roman",Times,serif }
+                               img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
+                               h2{font-size: 1.1em}
+                               .deckhead{font-size: small; text-transform: uppercase}
+                               .small{color: gray; font-size: small}
+                               .date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
+                            """

    conversion_options = {
-          'comment'          : description
-        , 'language'         : lang
-    }
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : 'Yes'
+                        }

-    extra_css = '''
-                h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
-                h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
-                .story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
-                .entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
-                .entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
-                .credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
-                .small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
-                .byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
-                .date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
-                .time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
-                .copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
-                .subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
-                '''
-
-   # recursions = 1
-   # match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
-
-    keep_only_tags    = [dict(name='div', attrs={'class':["story"  ,"entry"] })]
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':'story'})
+                       ,dict(attrs={'class':['entry-header','time','entry-content']})
+                     ]
+    remove_tags_after=dict(name='p', attrs={'class':'copyright'})
+    remove_tags = [
+                     dict(name=['meta','link','iframe','object','embed'])
+                    ,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
+                    ,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
+                  ]
+    remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']


-    remove_tags      = [   dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
-                            dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
-                            dict(name='p', attrs={'class':["entry-footer",]}),
-                           dict(name='ul', attrs={'class':"article-nav clearfix"}),
-                            dict(name=['iframe'])
-                        ]
-
-
-    feeds          = [(u'News', u'http://feeds.latimes.com/latimes/news')
-                      ,(u'Local','http://feeds.latimes.com/latimes/news/local')
-                      ,(u'MostEmailed','http://feeds.latimes.com/MostEmailed')
-                      ,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
-                      ,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
-                      ,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
-                      ,('Politics','http://feeds.latimes.com/latimes/news/politics/')
-                      ,('Business','http://feeds.latimes.com/latimes/business')
-                      ,('Sports','http://feeds.latimes.com/latimes/sports/')
-                      ,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
-                      ]
-
+    feeds = [
+              (u'Top News'             , u'http://feeds.latimes.com/latimes/news'                           )
+             ,(u'Local News'           , u'http://feeds.latimes.com/latimes/news/local'                     )
+             ,(u'National'             , u'http://feeds.latimes.com/latimes/news/nationworld/nation'        )
+             ,(u'National Politics'    , u'http://feeds.latimes.com/latimes/news/politics/'                 )
+             ,(u'Business'             , u'http://feeds.latimes.com/latimes/business'                       )
+             ,(u'Education'            , u'http://feeds.latimes.com/latimes/news/education'                 )
+             ,(u'Environment'          , u'http://feeds.latimes.com/latimes/news/science/environment'       )
+             ,(u'Religion'             , u'http://feeds.latimes.com/latimes/features/religion'              )
+             ,(u'Science'              , u'http://feeds.latimes.com/latimes/news/science'                   )
+             ,(u'Technology'           , u'http://feeds.latimes.com/latimes/technology'                     )
+             ,(u'Africa'               , u'http://feeds.latimes.com/latimes/africa'                         )
+             ,(u'Asia'                 , u'http://feeds.latimes.com/latimes/asia'                           )
+             ,(u'Europe'               , u'http://feeds.latimes.com/latimes/europe'                         )
+             ,(u'Latin America'        , u'http://feeds.latimes.com/latimes/latinamerica'                   )
+             ,(u'Middle East'          , u'http://feeds.latimes.com/latimes/middleeast'                     )
+             ,(u'Arts&Culture'         , u'http://feeds.feedburner.com/latimes/entertainment/news/arts'     )
+             ,(u'Entertainment News'   , u'http://feeds.feedburner.com/latimes/entertainment/news/'         )
+             ,(u'Movie News'           , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/'  )
+             ,(u'Movie Reviews'        , u'http://feeds.feedburner.com/movies/reviews/'                     )
+             ,(u'Music News'           , u'http://feeds.feedburner.com/latimes/entertainment/news/music/'   )
+             ,(u'Pop Album Reviews'    , u'http://feeds.feedburner.com/latimes/pop-album-reviews'           )
+             ,(u'Restaurant Reviews'   , u'http://feeds.feedburner.com/latimes/restaurant/reviews'          )
+             ,(u'Theatar and Dance'    , u'http://feeds.feedburner.com/latimes/theaterdance'                )
+             ,(u'Autos'                , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
+             ,(u'Books'                , u'http://feeds.latimes.com/features/books'                         )
+             ,(u'Food'                 , u'http://feeds.latimes.com/latimes/features/food/'                 )
+             ,(u'Health'               , u'http://feeds.latimes.com/latimes/features/health/'               )
+             ,(u'Real Estate'          , u'http://feeds.latimes.com/latimes/classified/realestate/'         )
+             ,(u'Commentary'           , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/'   )
+             ,(u'Sports'               , u'http://feeds.latimes.com/latimes/sports/'                        )
+            ]

    def get_article_url(self, article):
-        ans = article.get('feedburner_origlink').rpartition('?')[0]
+        ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]

        try:
            self.log('Looking for full story link in', ans)
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
            pass
        return ans

-
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name  ='div'
+                  item.attrs =[]
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup
--- a/resources/recipes/latribuna.recipe
+++ b/resources/recipes/latribuna.recipe
@ -21,7 +21,7 @@ class LaTribuna(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'

    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/lavanguardia.recipe
+++ b/resources/recipes/lavanguardia.recipe
@ -20,8 +20,8 @@ class LaVanguardia(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    delay                 = 1
-    encoding              = 'cp1252'
+    delay                 = 5
+ #   encoding              = 'cp1252'
    language = 'es'

    direction             = 'ltr'
@ -35,8 +35,8 @@ class LaVanguardia(BasicNewsRecipe):
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

    feeds              = [
-                            (u'Ciudadanos'           , u'http://feeds.feedburner.com/lavanguardia/ciudadanos'   )
-                           ,(u'Cultura'              , u'http://feeds.feedburner.com/lavanguardia/cultura'      )
+                            (u'Portada'           , u'http://feeds.feedburner.com/lavanguardia/home'   )
+                          ,(u'Cultura'              , u'http://feeds.feedburner.com/lavanguardia/cultura'      )
                           ,(u'Deportes'             , u'http://feeds.feedburner.com/lavanguardia/deportes'     )
                           ,(u'Economia'             , u'http://feeds.feedburner.com/lavanguardia/economia'     )
                           ,(u'El lector opina'      , u'http://feeds.feedburner.com/lavanguardia/lectoropina'  )
@ -45,17 +45,17 @@ class LaVanguardia(BasicNewsRecipe):
                           ,(u'Internet y tecnologia', u'http://feeds.feedburner.com/lavanguardia/internet'     )
                           ,(u'Motor'                , u'http://feeds.feedburner.com/lavanguardia/motor'        )
                           ,(u'Politica'             , u'http://feeds.feedburner.com/lavanguardia/politica'     )
-                           ,(u'Sucessos'             , u'http://feeds.feedburner.com/lavanguardia/sucesos'      )
+                           ,(u'Sucesos'             , u'http://feeds.feedburner.com/lavanguardia/sucesos'      )
                         ]


    keep_only_tags = [
-                       dict(name='div', attrs={'class':'element1_3'})
-                     ]
+                       dict(name='div', attrs={'class':'detalle  noticia'})
+                    ]

    remove_tags        = [
                             dict(name=['object','link','script'])
-                            ,dict(name='div', attrs={'class':['colC','peu']})
+                            ,dict(name='div', attrs={'class':['colC','peu','jstoolbar']})
                         ]

    remove_tags_after = [dict(name='div', attrs={'class':'text'})]
@ -67,4 +67,3 @@ class LaVanguardia(BasicNewsRecipe):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
-
--- a/resources/recipes/le_temps.recipe
+++ b/resources/recipes/le_temps.recipe
@ -15,12 +15,26 @@ class LeTemps(BasicNewsRecipe):
     oldest_article = 7
     max_articles_per_feed = 100
     __author__ = 'Sujata Raman'
+     description = 'French news. Needs a subscription from http://www.letemps.ch'
     no_stylesheets = True
     remove_javascript = True
     recursions = 1
     encoding = 'UTF-8'
     match_regexps = [r'http://www.letemps.ch/Page/Uuid/[-0-9a-f]+\|[1-9]']
     language = 'fr'
+     needs_subscription = True
+
+     def get_browser(self):
+         br = BasicNewsRecipe.get_browser(self)
+         br.open('http://www.letemps.ch/login')
+         br['username'] = self.username
+         br['password'] = self.password
+         raw = br.submit().read()
+         if '>Login' in raw:
+             raise ValueError('Failed to login to letemp.ch. Check '
+                     'your username and password')
+         return br
+

     keep_only_tags = [dict(name='div', attrs={'id':'content'}),
                        dict(name='div', attrs={'class':'story'})
--- a/resources/recipes/ledevoir.recipe
+++ b/resources/recipes/ledevoir.recipe
@ -9,6 +9,8 @@ __description__   = 'Canadian Paper '
 http://www.ledevoir.com/
 '''

+import re
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class ledevoir(BasicNewsRecipe):
@ -32,6 +34,8 @@ class ledevoir(BasicNewsRecipe):
    remove_javascript     = True
    no_stylesheets        = True

+    preprocess_regexps = [(re.compile(r'(title|alt)=".*?>.*?"', re.DOTALL), lambda m: '')]
+
    keep_only_tags  = [
                        dict(name='div', attrs={'id':'article'}),
                        dict(name='ul', attrs={'id':'ariane'})
--- a/resources/recipes/leduc.recipe
+++ b/resources/recipes/leduc.recipe
@ -0,0 +1,40 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1292550626(BasicNewsRecipe):
+   title = 'Leduc - Wetaskiwin Pipestone Flyer'
+   __author__ = 'Brian Hahn'
+   description = 'News from Alberta, Canada'
+   oldest_article = 56
+   max_articles_per_feed = 100
+   no_stylesheets = True
+   #delay = 1
+   use_embedded_content = False
+   publisher = 'Pipestone Publishing'
+   category = 'News, Alberta, Canada'
+   language = 'en_CA'
+   encoding = 'iso-8859-1'
+   cover_url = 'http://www.pipestoneflyer.ca/images/calibre-cover.jpg'
+   remove_tags_before = dict(id='ContentPanel')
+   remove_tags_after  = dict(id='ContentPanel')
+   remove_tags = [dict(name='div', attrs={'id':'StoryNav'}),dict(name='div', attrs={'id':'BottomAds'}),dict(name='div', attrs={'id':'MoreStoryLinks'})]
+   extra_css = 'img { margin:5px }'
+   feeds = [
+('Feature', 'http://www.pipestoneflyer.ca/Feature.rss'),
+('Editors Desk', 'http://www.pipestoneflyer.ca/Editor%27s%20Desk.rss'),
+('Letters', 'http://www.pipestoneflyer.ca/Letters.rss'),
+('A Loco Viewpoint', 'http://www.pipestoneflyer.ca/A%20Loco%20Viewpoint.rss'),
+('Lifes Doorway', 'http://www.pipestoneflyer.ca/Life%27s%20Doorway.rss'),
+('From the Otherside', 'http://www.pipestoneflyer.ca/From%20the%20Otherside.rss'),
+('Opinion', 'http://www.pipestoneflyer.ca/Opinion.rss'),
+('Community', 'http://www.pipestoneflyer.ca/Community.rss'),
+('Sports', 'http://www.pipestoneflyer.ca/Sports.rss'),
+('Chambers', 'http://www.pipestoneflyer.ca/Chambers.rss'),
+('Government', 'http://www.pipestoneflyer.ca/Government.rss'),
+('Environment', 'http://www.pipestoneflyer.ca/Environment.rss'),
+('Health', 'http://www.pipestoneflyer.ca/Health.rss'),
+('Funnies', 'http://www.pipestoneflyer.ca/Funnies.rss'),
+('Faith', 'http://www.pipestoneflyer.ca/Faith.rss'),
+('News and Views', 'http://www.pipestoneflyer.ca/News%20and%20Views.rss'),
+('Obituaries', 'http://www.pipestoneflyer.ca/Obituaries.rss'),
+('Police Blotter', 'http://www.pipestoneflyer.ca/Police%20Blotter.rss'),
+]
--- a/resources/recipes/los_tiempos_bo.recipe
+++ b/resources/recipes/los_tiempos_bo.recipe
@ -18,7 +18,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_BO'
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
--- a/resources/recipes/milenio.recipe
+++ b/resources/recipes/milenio.recipe
@ -12,7 +12,7 @@ import datetime
 class Milenio(BasicNewsRecipe):
    title                 = u'Milenio-diario'
    __author__            = 'Bmsleight'
-    language              = 'es'
+    language              = 'es_MX'
    description           = 'Milenio-diario'
    oldest_article        = 10
    max_articles_per_feed = 100
--- a/resources/recipes/miradasalsur.recipe
+++ b/resources/recipes/miradasalsur.recipe
@ -20,7 +20,7 @@ class MiradasAlSur(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/montevideo_com.recipe
+++ b/resources/recipes/montevideo_com.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = 'Montevideo COMM'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@ -13,15 +13,12 @@ class MSNSankeiNewsProduct(BasicNewsRecipe):
    description     = 'Products release from Japan'
    oldest_article = 7
    max_articles_per_feed = 100
-    encoding       = 'Shift_JIS'
+    encoding       = 'utf-8'
    language       = 'ja'
    cover_url       = 'http://sankei.jp.msn.com/images/common/sankeShinbunLogo.jpg'
    masthead_url = 'http://sankei.jp.msn.com/images/common/sankeiNewsLogo.gif'

    feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]

-    remove_tags_before = dict(id="__r_article_title__")
-    remove_tags_after  = dict(id="ajax_release_news")
-    remove_tags = [{'class':"parent chromeCustom6G"},
-                              dict(id="RelatedImg")
-                            ]
+    remove_tags_before = dict(id="NewsTitle")
+    remove_tags_after  = dict(id="RelatedTitle")
--- a/resources/recipes/nationalgeographic.recipe
+++ b/resources/recipes/nationalgeographic.recipe
@ -10,6 +10,7 @@ import re
 class NationalGeographicNews(BasicNewsRecipe):
    title          = u'National Geographic News'
    oldest_article = 7
+    language = 'en'
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets = True
--- a/Show More
+++ b/Show More