From a300879f7055ed0c9ddc5cbe44484fdfc940f05c Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 19 Mar 2011 15:32:05 -0400
Subject: [PATCH 01/26] TXT Input: Texttile: Simplify code for handing macros
 and glyphs.

---
 src/calibre/ebooks/textile/functions.py | 408 +++++++-----------------
 1 file changed, 114 insertions(+), 294 deletions(-)

diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py
index 891211de30..b37cd4aab8 100755
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@@ -121,97 +121,113 @@ class Textile(object):
     btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
     btag_lite = ('bq', 'bc', 'p')
 
-    glyph_defaults = (
-        ('mac_cent',               '&#162;'),
-        ('mac_pound',              '&#163;'),
-        ('mac_yen',                '&#165;'),
-        ('mac_quarter',            '&#188;'),
-        ('mac_half',               '&#189;'),
-        ('mac_three-quarter',      '&#190;'),
-        ('mac_cA-grave',           '&#192;'),
-        ('mac_cA-acute',           '&#193;'),
-        ('mac_cA-circumflex',      '&#194;'),
-        ('mac_cA-tilde',           '&#195;'),
-        ('mac_cA-diaeresis',       '&#196;'),
-        ('mac_cA-ring',            '&#197;'),
-        ('mac_cAE',                '&#198;'),
-        ('mac_cC-cedilla',         '&#199;'),
-        ('mac_cE-grave',           '&#200;'),
-        ('mac_cE-acute',           '&#201;'),
-        ('mac_cE-circumflex',      '&#202;'),
-        ('mac_cE-diaeresis',       '&#203;'),
-        ('mac_cI-grave',           '&#204;'),
-        ('mac_cI-acute',           '&#205;'),
-        ('mac_cI-circumflex',      '&#206;'),
-        ('mac_cI-diaeresis',       '&#207;'),
-        ('mac_cEth',               '&#208;'),
-        ('mac_cN-tilde',           '&#209;'),
-        ('mac_cO-grave',           '&#210;'),
-        ('mac_cO-acute',           '&#211;'),
-        ('mac_cO-circumflex',      '&#212;'),
-        ('mac_cO-tilde',           '&#213;'),
-        ('mac_cO-diaeresis',       '&#214;'),
-        ('mac_cO-stroke',          '&#216;'),
-        ('mac_cU-grave',           '&#217;'),
-        ('mac_cU-acute',           '&#218;'),
-        ('mac_cU-circumflex',      '&#219;'),
-        ('mac_cU-diaeresis',       '&#220;'),
-        ('mac_cY-acute',           '&#221;'),
-        ('mac_sa-grave',           '&#224;'),
-        ('mac_sa-acute',           '&#225;'),
-        ('mac_sa-circumflex',      '&#226;'),
-        ('mac_sa-tilde',           '&#227;'),
-        ('mac_sa-diaeresis',       '&#228;'),
-        ('mac_sa-ring',            '&#229;'),
-        ('mac_sae',                '&#230;'),
-        ('mac_sc-cedilla',         '&#231;'),
-        ('mac_se-grave',           '&#232;'),
-        ('mac_se-acute',           '&#233;'),
-        ('mac_se-circumflex',      '&#234;'),
-        ('mac_se-diaeresis',       '&#235;'),
-        ('mac_si-grave',           '&#236;'),
-        ('mac_si-acute',           '&#237;'),
-        ('mac_si-circumflex',      '&#238;'),
-        ('mac_si-diaeresis',       '&#239;'),
-        ('mac_sn-tilde',           '&#241;'),
-        ('mac_so-grave',           '&#242;'),
-        ('mac_so-acute',           '&#243;'),
-        ('mac_so-circumflex',      '&#244;'),
-        ('mac_so-tilde',           '&#245;'),
-        ('mac_so-diaeresis',       '&#246;'),
-        ('mac_so-stroke',          '&#248;'),
-        ('mac_su-grave',           '&#249;'),
-        ('mac_su-acute',           '&#250;'),
-        ('mac_su-circumflex',      '&#251;'),
-        ('mac_su-diaeresis',       '&#252;'),
-        ('mac_sy-acute',           '&#253;'),
-        ('mac_sy-diaeresis',       '&#255;'),
-        ('mac_cOE',                '&#338;'),
-        ('mac_soe',                '&#339;'),
-        ('mac_bullet',             '&#8226;'),
-        ('mac_franc',              '&#8355;'),
-        ('mac_lira',               '&#8356;'),
-        ('mac_rupee',              '&#8360;'),
-        ('mac_euro',               '&#8364;'),
-        ('mac_spade',              '&#9824;'),
-        ('mac_club',               '&#9827;'),
-        ('mac_heart',              '&#9829;'),
-        ('mac_diamond',            '&#9830;'),
-        ('txt_dimension',          '&#215;'),
-        ('txt_quote_single_open',  '&#8216;'),
-        ('txt_quote_single_close', '&#8217;'),
-        ('txt_quote_double_open',  '&#8220;'),
-        ('txt_quote_double_close', '&#8221;'),
-        ('txt_apostrophe',         '&#8217;'),
-        ('txt_prime',              '&#8242;'),
-        ('txt_prime_double',       '&#8243;'),
-        ('txt_ellipsis',           '&#8230;'),
-        ('txt_emdash',             '&#8212;'),
-        ('txt_endash',             '&#8211;'),
-        ('txt_trademark',          '&#8482;'),
-        ('txt_registered',         '&#174;'),
-        ('txt_copyright',          '&#169;'),
-    )
+    macro_defaults = [
+        (re.compile(r'{(c\||\|c)}'),     r'&#162;'),   #  cent
+        (re.compile(r'{(L-|-L)}'),       r'&#163;'),   #  pound
+        (re.compile(r'{(Y=|=Y)}'),       r'&#165;'),   #  yen
+        (re.compile(r'{\(c\)}'),         r'&#169;'),   #  copyright
+        (re.compile(r'{\(r\)}'),         r'&#174;'),   #  registered
+        (re.compile(r'{(\+_|_\+)}'),     r'&#177;'),   #  plus-minus
+        (re.compile(r'{1/4}'),           r'&#188;'),   #  quarter
+        (re.compile(r'{1/2}'),           r'&#189;'),   #  half
+        (re.compile(r'{3/4}'),           r'&#190;'),   #  three-quarter
+        (re.compile(r'{(A`|`A)}'),       r'&#192;'),   #  A-acute
+        (re.compile(r'{(A\'|\'A)}'),     r'&#193;'),   #  A-grave
+        (re.compile(r'{(A\^|\^A)}'),     r'&#194;'),   #  A-circumflex
+        (re.compile(r'{(A~|~A)}'),       r'&#195;'),   #  A-tilde
+        (re.compile(r'{(A\"|\"A)}'),     r'&#196;'),   #  A-diaeresis
+        (re.compile(r'{(Ao|oA)}'),       r'&#197;'),   #  A-ring
+        (re.compile(r'{(AE)}'),          r'&#198;'),   #  AE
+        (re.compile(r'{(C,|,C)}'),       r'&#199;'),   #  C-cedilla
+        (re.compile(r'{(E`|`E)}'),       r'&#200;'),   #  E-acute
+        (re.compile(r'{(E\'|\'E)}'),     r'&#201;'),   #  E-grave
+        (re.compile(r'{(E\^|\^E)}'),     r'&#202;'),   #  E-circumflex
+        (re.compile(r'{(E\"|\"E)}'),     r'&#203;'),   #  E-diaeresis
+        (re.compile(r'{(I`|`I)}'),       r'&#204;'),   #  I-acute
+        (re.compile(r'{(I\'|\'I)}'),     r'&#205;'),   #  I-grave
+        (re.compile(r'{(I\^|\^I)}'),     r'&#206;'),   #  I-circumflex
+        (re.compile(r'{(I\"|\"I)}'),     r'&#207;'),   #  I-diaeresis
+        (re.compile(r'{(D-|-D)}'),       r'&#208;'),   #  ETH
+        (re.compile(r'{(N~|~N)}'),       r'&#209;'),   #  N-tilde
+        (re.compile(r'{(O`|`O)}'),       r'&#210;'),   #  O-acute
+        (re.compile(r'{(O\'|\'O)}'),     r'&#211;'),   #  O-grave
+        (re.compile(r'{(O\^|\^O)}'),     r'&#212;'),   #  O-circumflex
+        (re.compile(r'{(O~|~O)}'),       r'&#213;'),   #  O-tilde
+        (re.compile(r'{(O\"|\"O)}'),     r'&#214;'),   #  O-diaeresis
+        (re.compile(r'{x}'),             r'&#215;'),   #  dimension
+        (re.compile(r'{(O\/|\/O)}'),     r'&#216;'),   #  O-slash
+        (re.compile(r'{(U`|`U)}'),       r'&#217;'),   #  U-acute
+        (re.compile(r'{(U\'|\'U)}'),     r'&#218;'),   #  U-grave
+        (re.compile(r'{(U\^|\^U)}'),     r'&#219;'),   #  U-circumflex
+        (re.compile(r'{(U\"|\"U)}'),     r'&#220;'),   #  U-diaeresis
+        (re.compile(r'{(Y\'|\'Y)}'),     r'&#221;'),   #  Y-grave
+        (re.compile(r'{sz}'),            r'&szlig;'),  #  sharp-s
+        (re.compile(r'{(a`|`a)}'),       r'&#224;'),   #  a-grave
+        (re.compile(r'{(a\'|\'a)}'),     r'&#225;'),   #  a-acute
+        (re.compile(r'{(a\^|\^a)}'),     r'&#226;'),   #  a-circumflex
+        (re.compile(r'{(a~|~a)}'),       r'&#227;'),   #  a-tilde
+        (re.compile(r'{(a\"|\"a)}'),     r'&#228;'),   #  a-diaeresis
+        (re.compile(r'{(ao|oa)}'),       r'&#229;'),   #  a-ring
+        (re.compile(r'{ae}'),            r'&#230;'),   #  ae
+        (re.compile(r'{(c,|,c)}'),       r'&#231;'),   #  c-cedilla
+        (re.compile(r'{(e`|`e)}'),       r'&#232;'),   #  e-grave
+        (re.compile(r'{(e\'|\'e)}'),     r'&#233;'),   #  e-acute
+        (re.compile(r'{(e\^|\^e)}'),     r'&#234;'),   #  e-circumflex
+        (re.compile(r'{(e\"|\"e)}'),     r'&#235;'),   #  e-diaeresis
+        (re.compile(r'{(i`|`i)}'),       r'&#236;'),   #  i-grave
+        (re.compile(r'{(i\'|\'i)}'),     r'&#237;'),   #  i-acute
+        (re.compile(r'{(i\^|\^i)}'),     r'&#238;'),   #  i-circumflex
+        (re.compile(r'{(i\"|\"i)}'),     r'&#239;'),   #  i-diaeresis
+        (re.compile(r'{(d-|-d)}'),       r'&#240;'),   #  eth
+        (re.compile(r'{(n~|~n)}'),       r'&#241;'),   #  n-tilde
+        (re.compile(r'{(o`|`o)}'),       r'&#242;'),   #  o-grave
+        (re.compile(r'{(o\'|\'o)}'),     r'&#243;'),   #  o-acute
+        (re.compile(r'{(o\^|\^o)}'),     r'&#244;'),   #  o-circumflex
+        (re.compile(r'{(o~|~o)}'),       r'&#245;'),   #  o-tilde
+        (re.compile(r'{(o\"|\"o)}'),     r'&#246;'),   #  o-diaeresis
+        (re.compile(r'{(o\/|\/o)}'),     r'&#248;'),   #  o-stroke
+        (re.compile(r'{(u`|`u)}'),       r'&#249;'),   #  u-grave
+        (re.compile(r'{(u\'|\'u)}'),     r'&#250;'),   #  u-acute
+        (re.compile(r'{(u\^|\^u)}'),     r'&#251;'),   #  u-circumflex
+        (re.compile(r'{(u\"|\"u)}'),     r'&#252;'),   #  u-diaeresis
+        (re.compile(r'{(y\'|\'y)}'),     r'&#253;'),   #  y-acute
+        (re.compile(r'{(y\"|\"y)}'),     r'&#255;'),   #  y-diaeresis
+        (re.compile(r'{OE}'),            r'&#338;'),   #  OE
+        (re.compile(r'{oe}'),            r'&#339;'),   #  oe
+        (re.compile(r'{(S\^|\^S)}'),     r'&Scaron;'), #  Scaron
+        (re.compile(r'{(s\^|\^s)}'),     r'&scaron;'), #  scaron
+        (re.compile(r'{\*}'),            r'&#8226;'),  #  bullet
+        (re.compile(r'{Fr}'),            r'&#8355;'),  #  Franc
+        (re.compile(r'{(L=|=L)}'),       r'&#8356;'),  #  Lira
+        (re.compile(r'{Rs}'),            r'&#8360;'),  #  Rupee
+        (re.compile(r'{(C=|=C)}'),       r'&#8364;'),  #  euro
+        (re.compile(r'{tm}'),            r'&#8482;'),  #  trademark
+        (re.compile(r'{spade}'),         r'&#9824;'),  #  spade
+        (re.compile(r'{club}'),          r'&#9827;'),  #  club
+        (re.compile(r'{heart}'),         r'&#9829;'),  #  heart
+        (re.compile(r'{diamond}'),       r'&#9830;'),  #  diamond
+    ]
+    glyph_defaults = [
+        (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'),                   r'\1\2&#215;\3'),                       #  dimension sign
+        (re.compile(r'(\d+)\'', re.I),                                 r'\1&#8242;'),                          #  prime
+        (re.compile(r'(\d+)\"', re.I),                                 r'\1&#8243;'),                          #  prime-double
+        (re.compile(r"(\w)\'(\w)"),                                    r'\1&#8217;\2'),                        #  apostrophe's
+        (re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),                        r'\1&#8217;\2'),                        #  back in '88
+        (re.compile(r'(\S)\'(?=\s|\'|<|$)'),                           r'\1&#8217;'),                          #  single closing
+        (re.compile(r'\'/'),                                           r'&#8216;'),                            #  single opening
+        (re.compile(r'(\")\"'),                                        r'\1&#8221;'),                          #  double closing - following another
+        (re.compile(r'(\S)\"(?=\s|\"|<|$)'),                           r'\1&#8221;'),                          #  double closing
+        (re.compile(r'"'),                                             r'&#8220;'),                            #  double opening
+        (re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),      r'<acronym title="\2">\1</acronym>'),   #  3+ uppercase acronym
+        (re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),         r'<span class="caps">\1</span>'),       #  3+ uppercase
+        (re.compile(r'\b(\s{0,1})?\.{3}'),                             r'\1&#8260;'),                          #  ellipsis
+        (re.compile(r'(\s?)--(\s?)'),                                  r'\1&#8212;\2'),                        #  em dash
+        (re.compile(r'\s-(?:\s|$)'),                                   r' &#8211; '),                          #  en dash
+        (re.compile(r'\b( ?)[([]TM[])]', re.I),                        r'\1&#8482;'),                          #  trademark
+        (re.compile(r'\b( ?)[([]R[])]', re.I),                         r'\1&#174;'),                           #  registered
+        (re.compile(r'\b( ?)[([]C[])]', re.I),                         r'\1&#169;'),                           #  copyright
+    ]
+
 
     def __init__(self, restricted=False, lite=False, noimage=False):
         """docstring for __init__"""
@@ -673,211 +689,15 @@ class Textile(object):
         # fix: hackish
         text = re.sub(r'"\Z', '\" ', text)
 
-        glyph_search = (
-            re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'),                     #  dimension sign
-            re.compile(r"(\w)\'(\w)"),                                      #  apostrophe's
-            re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),                          #  back in '88
-            re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'),                  #  single closing
-            re.compile(r'\'/'),                                             #  single opening
-            re.compile(r'(\")\"'),                                          #  double closing - following another
-            re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'),                  #  double closing
-            re.compile(r'"'),                                               #  double opening
-            re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),        #  3+ uppercase acronym
-            re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),           #  3+ uppercase
-            re.compile(r'\b(\s{0,1})?\.{3}'),                               #  ellipsis
-            re.compile(r'(\s?)--(\s?)'),                                    #  em dash
-            re.compile(r'\s-(?:\s|$)'),                                     #  en dash
-            re.compile(r'\b( ?)[([]TM[])]', re.I),                            #  trademark
-            re.compile(r'\b( ?)[([]R[])]', re.I),                             #  registered
-            re.compile(r'\b( ?)[([]C[])]', re.I)                              #  copyright
-         )
-
-        glyph_replace = [x % dict(self.glyph_defaults) for x in (
-            r'\1\2%(txt_dimension)s\3',          #  dimension sign
-            r'\1%(txt_apostrophe)s\2',           #  apostrophe's
-            r'\1%(txt_apostrophe)s\2',           #  back in '88
-            r'\1%(txt_quote_single_close)s',     #  single closing
-            r'%(txt_quote_single_open)s',        #  single opening
-            r'\1%(txt_quote_double_close)s',     #  double closing - following another
-            r'\1%(txt_quote_double_close)s',     #  double closing
-            r'%(txt_quote_double_open)s',        #  double opening
-            r'<acronym title="\2">\1</acronym>', #  3+ uppercase acronym
-            r'<span class="caps">\1</span>',     #  3+ uppercase
-            r'\1%(txt_ellipsis)s',               #  ellipsis
-            r'\1%(txt_emdash)s\2',               #  em dash
-            r' %(txt_endash)s ',                 #  en dash
-            r'\1%(txt_trademark)s',              #  trademark
-            r'\1%(txt_registered)s',             #  registered
-            r'\1%(txt_copyright)s'               #  copyright
-        )]
-
-        if re.search(r'{.+?}', text):
-            glyph_search += (
-                re.compile(r'{(c\||\|c)}'),                               #  cent
-                re.compile(r'{(L-|-L)}'),                                 #  pound
-                re.compile(r'{(Y=|=Y)}'),                                 #  yen
-                re.compile(r'{\(c\)}'),                                   #  copyright
-                re.compile(r'{\(r\)}'),                                   #  registered
-                re.compile(r'{1/4}'),                                     #  quarter
-                re.compile(r'{1/2}'),                                     #  half
-                re.compile(r'{3/4}'),                                     #  three-quarter
-                re.compile(r'{(A`|`A)}'),                                 #  192;
-                re.compile(r'{(A\'|\'A)}'),                               #  193;
-                re.compile(r'{(A\^|\^A)}'),                               #  194;
-                re.compile(r'{(A~|~A)}'),                                 #  195;
-                re.compile(r'{(A\"|\"A)}'),                               #  196;
-                re.compile(r'{(Ao|oA)}'),                                 #  197;
-                re.compile(r'{(AE)}'),                                    #  198;
-                re.compile(r'{(C,|,C)}'),                                 #  199;
-                re.compile(r'{(E`|`E)}'),                                 #  200;
-                re.compile(r'{(E\'|\'E)}'),                               #  201;
-                re.compile(r'{(E\^|\^E)}'),                               #  202;
-                re.compile(r'{(E\"|\"E)}'),                               #  203;
-                re.compile(r'{(I`|`I)}'),                                 #  204;
-                re.compile(r'{(I\'|\'I)}'),                               #  205;
-                re.compile(r'{(I\^|\^I)}'),                               #  206;
-                re.compile(r'{(I\"|\"I)}'),                               #  207;
-                re.compile(r'{(D-|-D)}'),                                 #  208;
-                re.compile(r'{(N~|~N)}'),                                 #  209;
-                re.compile(r'{(O`|`O)}'),                                 #  210;
-                re.compile(r'{(O\'|\'O)}'),                               #  211;
-                re.compile(r'{(O\^|\^O)}'),                               #  212;
-                re.compile(r'{(O~|~O)}'),                                 #  213;
-                re.compile(r'{(O\"|\"O)}'),                               #  214;
-                re.compile(r'{(O\/|\/O)}'),                               #  215;
-                re.compile(r'{(U`|`U)}'),                                 #  216;
-                re.compile(r'{(U\'|\'U)}'),                               #  217;
-                re.compile(r'{(U\^|\^U)}'),                               #  218;
-                re.compile(r'{(U\"|\"U)}'),                               #  219;
-                re.compile(r'{(Y\'|\'Y)}'),                               #  220;
-                re.compile(r'{(a`|`a)}'),                                 #  a-grace
-                re.compile(r'{(a\'|\'a)}'),                               #  a-acute
-                re.compile(r'{(a\^|\^a)}'),                               #  a-circumflex
-                re.compile(r'{(a~|~a)}'),                                 #  a-tilde
-                re.compile(r'{(a\"|\"a)}'),                               #  a-diaeresis
-                re.compile(r'{(ao|oa)}'),                                 #  a-ring
-                re.compile(r'{ae}'),                                      #  ae
-                re.compile(r'{(c,|,c)}'),                                 #  c-cedilla
-                re.compile(r'{(e`|`e)}'),                                 #  e-grace
-                re.compile(r'{(e\'|\'e)}'),                               #  e-acute
-                re.compile(r'{(e\^|\^e)}'),                               #  e-circumflex
-                re.compile(r'{(e\"|\"e)}'),                               #  e-diaeresis
-                re.compile(r'{(i`|`i)}'),                                 #  i-grace
-                re.compile(r'{(i\'|\'i)}'),                               #  i-acute
-                re.compile(r'{(i\^|\^i)}'),                               #  i-circumflex
-                re.compile(r'{(i\"|\"i)}'),                               #  i-diaeresis
-                re.compile(r'{(n~|~n)}'),                                 #  n-tilde
-                re.compile(r'{(o`|`o)}'),                                 #  o-grace
-                re.compile(r'{(o\'|\'o)}'),                               #  o-acute
-                re.compile(r'{(o\^|\^o)}'),                               #  o-circumflex
-                re.compile(r'{(o~|~o)}'),                                 #  o-tilde
-                re.compile(r'{(o\"|\"o)}'),                               #  o-diaeresis
-                re.compile(r'{(o\/|\/o)}'),                               #  o-stroke
-                re.compile(r'{(u`|`u)}'),                                 #  u-grace
-                re.compile(r'{(u\'|\'u)}'),                               #  u-acute
-                re.compile(r'{(u\^|\^u)}'),                               #  u-circumflex
-                re.compile(r'{(u\"|\"u)}'),                               #  u-diaeresis
-                re.compile(r'{(y\'|\'y)}'),                               #  y-acute
-                re.compile(r'{(y\"|\"y)}'),                               #  y-diaeresis
-                re.compile(r'{OE}'),                                      #  y-diaeresis
-                re.compile(r'{oe}'),                                      #  y-diaeresis
-                re.compile(r'{\*}'),                                      #  bullet
-                re.compile(r'{Fr}'),                                      #  Franc
-                re.compile(r'{(L=|=L)}'),                                 #  Lira
-                re.compile(r'{Rs}'),                                      #  Rupee
-                re.compile(r'{(C=|=C)}'),                                 #  euro
-                re.compile(r'{tm}'),                                      #  euro
-                re.compile(r'{spade}'),                                   #  spade
-                re.compile(r'{club}'),                                    #  club
-                re.compile(r'{heart}'),                                   #  heart
-                re.compile(r'{diamond}')                                  #  diamond
-             )
-    
-            glyph_replace += [x % dict(self.glyph_defaults) for x in (
-                r'%(mac_cent)s',                     #  cent
-                r'%(mac_pound)s',                    #  pound
-                r'%(mac_yen)s',                      #  yen
-                r'%(txt_copyright)s',                #  copyright
-                r'%(txt_registered)s',               #  registered
-                r'%(mac_quarter)s',                  #  quarter
-                r'%(mac_half)s',                     #  half
-                r'%(mac_three-quarter)s',            #  three-quarter
-                r'%(mac_cA-grave)s',                 #  192;
-                r'%(mac_cA-acute)s',                 #  193;
-                r'%(mac_cA-circumflex)s',            #  194;
-                r'%(mac_cA-tilde)s',                 #  195;
-                r'%(mac_cA-diaeresis)s',             #  196;
-                r'%(mac_cA-ring)s',                  #  197;
-                r'%(mac_cAE)s',                      #  198;
-                r'%(mac_cC-cedilla)s',               #  199;
-                r'%(mac_cE-grave)s',                 #  200;
-                r'%(mac_cE-acute)s',                 #  201;
-                r'%(mac_cE-circumflex)s',            #  202;
-                r'%(mac_cE-diaeresis)s',             #  203;
-                r'%(mac_cI-grave)s',                 #  204;
-                r'%(mac_cI-acute)s',                 #  205;
-                r'%(mac_cI-circumflex)s',            #  206;
-                r'%(mac_cI-diaeresis)s',             #  207;
-                r'%(mac_cEth)s',                     #  208;
-                r'%(mac_cN-tilde)s',                 #  209;
-                r'%(mac_cO-grave)s',                 #  210;
-                r'%(mac_cO-acute)s',                 #  211;
-                r'%(mac_cO-circumflex)s',            #  212;
-                r'%(mac_cO-tilde)s',                 #  213;
-                r'%(mac_cO-diaeresis)s',             #  214;
-                r'%(mac_cO-stroke)s',                #  216;
-                r'%(mac_cU-grave)s',                 #  217;
-                r'%(mac_cU-acute)s',                 #  218;
-                r'%(mac_cU-circumflex)s',            #  219;
-                r'%(mac_cU-diaeresis)s',             #  220;
-                r'%(mac_cY-acute)s',                 #  221;
-                r'%(mac_sa-grave)s',                 #  224;
-                r'%(mac_sa-acute)s',                 #  225;
-                r'%(mac_sa-circumflex)s',            #  226;
-                r'%(mac_sa-tilde)s',                 #  227;
-                r'%(mac_sa-diaeresis)s',             #  228;
-                r'%(mac_sa-ring)s',                  #  229;
-                r'%(mac_sae)s',                      #  230;
-                r'%(mac_sc-cedilla)s',               #  231;
-                r'%(mac_se-grave)s',                 #  232;
-                r'%(mac_se-acute)s',                 #  233;
-                r'%(mac_se-circumflex)s',            #  234;
-                r'%(mac_se-diaeresis)s',             #  235;
-                r'%(mac_si-grave)s',                 #  236;
-                r'%(mac_si-acute)s',                 #  237;
-                r'%(mac_si-circumflex)s',            #  238;
-                r'%(mac_si-diaeresis)s',             #  239;
-                r'%(mac_sn-tilde)s',                 #  241;
-                r'%(mac_so-grave)s',                 #  242;
-                r'%(mac_so-acute)s',                 #  243;
-                r'%(mac_so-circumflex)s',            #  244;
-                r'%(mac_so-tilde)s',                 #  245;
-                r'%(mac_so-diaeresis)s',             #  246;
-                r'%(mac_so-stroke)s',                #  248;
-                r'%(mac_su-grave)s',                 #  249;
-                r'%(mac_su-acute)s',                 #  250;
-                r'%(mac_su-circumflex)s',            #  251;
-                r'%(mac_su-diaeresis)s',             #  252;
-                r'%(mac_sy-acute)s',                 #  253;
-                r'%(mac_sy-diaeresis)s',             #  255;
-                r'%(mac_cOE)s',                      #  338;
-                r'%(mac_soe)s',                      #  339;
-                r'%(mac_bullet)s',                   #  bullet
-                r'%(mac_franc)s',                    #  franc
-                r'%(mac_lira)s',                     #  lira
-                r'%(mac_rupee)s',                    #  rupee
-                r'%(mac_euro)s',                     #  euro
-                r'%(txt_trademark)s',                #  trademark
-                r'%(mac_spade)s',                    #  spade
-                r'%(mac_club)s',                     #  club
-                r'%(mac_heart)s',                    #  heart
-                r'%(mac_diamond)s'                   #  diamond
-            )]
-
         result = []
         for line in re.compile(r'(<.*?>)', re.U).split(text):
             if not re.search(r'<.*>', line):
-                for s, r in zip(glyph_search, glyph_replace):
+                rules = []
+                if re.search(r'{.+?}', line):
+                    rules = self.macro_defaults + self.glyph_defaults
+                else:
+                    rules = self.glyph_defaults
+                for s, r in rules:
                     line = s.sub(r, line)
             result.append(line)
         return ''.join(result)
@@ -1045,7 +865,7 @@ class Textile(object):
         'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
         """
         qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
-        pnct = ".,\"'?!;:"
+        pnct = ".,\"'?!;:()"
 
         for qtag in qtags:
             pattern = re.compile(r"""

From 9ad00b98d4f441001f6c43221289a5acb036c477 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 19 Mar 2011 15:46:51 -0400
Subject: [PATCH 02/26] TXT Input: Textile: Fix issue with double closings.

---
 src/calibre/ebooks/textile/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py
index b37cd4aab8..5e07cdaec2 100755
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@@ -216,7 +216,7 @@ class Textile(object):
         (re.compile(r'(\S)\'(?=\s|\'|<|$)'),                           r'\1&#8217;'),                          #  single closing
         (re.compile(r'\'/'),                                           r'&#8216;'),                            #  single opening
         (re.compile(r'(\")\"'),                                        r'\1&#8221;'),                          #  double closing - following another
-        (re.compile(r'(\S)\"(?=\s|\"|<|$)'),                           r'\1&#8221;'),                          #  double closing
+        (re.compile(r'(\S)\"(?=\s|&#8221;|<|$)'),                      r'\1&#8221;'),                          #  double closing
         (re.compile(r'"'),                                             r'&#8220;'),                            #  double opening
         (re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),      r'<acronym title="\2">\1</acronym>'),   #  3+ uppercase acronym
         (re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),         r'<span class="caps">\1</span>'),       #  3+ uppercase

From 86255b1b107aa510137bd802c9e39f06796b7fa1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Mar 2011 14:47:41 -0600
Subject: [PATCH 03/26] Fix #9448 (Support for Archos 43 tablet)

---
 src/calibre/devices/android/driver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 1ddc14bd1f..a527e8a29b 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -64,6 +64,7 @@ class ANDROID(USBMS):
             0x0e79 : {
                 0x1400 : [0x0222, 0x0216],
                 0x1408 : [0x0222, 0x0216],
+                0x1417 : [0x0216],
                 0x1419 : [0x0216],
                 0x1420 : [0x0216],
                 0x1422 : [0x0216]
@@ -98,7 +99,7 @@ class ANDROID(USBMS):
             'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
             'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
             'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
-            '7', 'A956', 'A955']
+            '7', 'A956', 'A955', 'A43']
     WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
             'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
             'A70S', 'A101IT', '7']

From 306a2e206c06ababc9d9577e7b20ec0f4f4fcff0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Mar 2011 15:25:18 -0600
Subject: [PATCH 04/26] ...

---
 src/calibre/gui2/widgets.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py
index 8ebf9c2c21..4ff2562bea 100644
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@@ -342,6 +342,7 @@ class FontFamilyModel(QAbstractListModel):
         self.families = list(qt_families.intersection(set(self.families)))
         self.families.sort()
         self.families[:0] = [_('None')]
+        self.font = QFont('sansserif')
 
     def rowCount(self, *args):
         return len(self.families)
@@ -354,10 +355,11 @@ class FontFamilyModel(QAbstractListModel):
             return NONE
         if role == Qt.DisplayRole:
             return QVariant(family)
-        if False and role == Qt.FontRole:
-            # Causes a Qt crash with some fonts
-            # so disabled.
-            return QVariant(QFont(family))
+        if role == Qt.FontRole:
+            # If a user chooses some non standard font as the interface font,
+            # rendering some font names causes Qt to crash, so return what is
+            # hopefully a "safe" font
+            return QVariant(self.font)
         return NONE
 
     def index_of(self, family):

From ec622f426d2ae00f7fd01dbb591576b1ef213f99 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Mar 2011 15:36:50 -0600
Subject: [PATCH 05/26] ...

---
 setup/installer/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup/installer/__init__.py b/setup/installer/__init__.py
index c25334dbe4..e07586eefd 100644
--- a/setup/installer/__init__.py
+++ b/setup/installer/__init__.py
@@ -14,9 +14,9 @@ from setup.build_environment import HOST, PROJECT
 BASE_RSYNC = ['rsync', '-avz', '--delete']
 EXCLUDES = []
 for x in [
-    'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
+    'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac', 'recipes',
     '.bzr', '.build', '.svn', 'build', 'dist', 'imgsrc', '*.pyc', '*.pyo', '*.swp',
-    '*.swo']:
+    '*.swo', 'format_docs']:
     EXCLUDES.extend(['--exclude', x])
 SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
 

From 0fbc8d717b186abbe87d47e09a3bf699ef7c6bc8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Mar 2011 17:52:35 -0600
Subject: [PATCH 06/26] ...

---
 setup/installer/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup/installer/__init__.py b/setup/installer/__init__.py
index e07586eefd..f2d598e33a 100644
--- a/setup/installer/__init__.py
+++ b/setup/installer/__init__.py
@@ -138,7 +138,7 @@ class VMInstaller(Command):
             self.vm = self.VM
         if not self.vmware_started():
             self.start_vmware()
-        subprocess.call(['chmod', '-R', '+r', 'resources/recipes'])
+        subprocess.call(['chmod', '-R', '+r', 'recipes'])
         self.start_vm()
         self.download_installer()
         if not self.dont_shutdown:

From c979cb10b8a91da5d061c1bdec099510d84c0cff Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Mar 2011 18:02:29 -0600
Subject: [PATCH 07/26] Windows build: Put all python files into a zip file to
 reduce upgrade time

---
 setup/installer/windows/freeze.py | 116 +++++++++++++++++++++++++++++-
 setup/installer/windows/site.py   |   2 +-
 setup/installer/windows/util.c    |   2 +-
 3 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py
index e9e47816fd..cf4dcd5f9d 100644
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import sys, os, shutil, glob, py_compile, subprocess, re
+import sys, os, shutil, glob, py_compile, subprocess, re, zipfile, time
 
 from setup import Command, modules, functions, basenames, __version__, \
     __appname__
@@ -40,6 +40,13 @@ DESCRIPTIONS = {
         'calibre-smtp' : 'Command line interface for sending books via email',
 }
 
+def walk(dir):
+    ''' A nice interface to os.walk '''
+    for record in os.walk(dir):
+        for f in record[-1]:
+            yield os.path.join(record[0], f)
+
+
 class Win32Freeze(Command, WixMixIn):
 
     description = 'Free windows calibre installation'
@@ -63,12 +70,15 @@ class Win32Freeze(Command, WixMixIn):
         self.rc_template = self.j(self.d(self.a(__file__)), 'template.rc')
         self.py_ver = ''.join(map(str, sys.version_info[:2]))
         self.lib_dir = self.j(self.base, 'Lib')
+        self.pydlib = self.j(self.base, 'pydlib')
+        self.pylib = self.j(self.base, 'pylib.zip')
 
         self.initbase()
         self.build_launchers()
         self.freeze()
         self.embed_manifests()
         self.install_site_py()
+        self.archive_lib_dir()
         self.create_installer()
 
     def initbase(self):
@@ -356,4 +366,108 @@ class Win32Freeze(Command, WixMixIn):
                             dest, lib]
                     self.run_builder(cmd)
 
+    def archive_lib_dir(self):
+        self.info('Putting all python code into a zip file for performance')
+        if os.path.exists(self.pydlib):
+            shutil.rmtree(self.pydlib)
+        os.makedirs(self.pydlib)
+        self.zf_timestamp = time.localtime(time.time())[:6]
+        self.zf_names = set()
+        with zipfile.ZipFile(self.pylib, 'w', zipfile.ZIP_STORED) as zf:
+            for x in os.listdir(self.lib_dir):
+                if x == 'site-packages':
+                    continue
+                self.add_to_zipfile(zf, x, self.lib_dir)
+
+            sp = self.j(self.lib_dir, 'site-packages')
+            handled = set(['site.pyo'])
+            for pth in ('PIL.pth', 'pywin32.pth'):
+                handled.add(pth)
+                shutil.copyfile(self.j(sp, pth), self.j(self.pydlib, pth))
+                for d in self.get_pth_dirs(self.j(sp, pth)):
+                    shutil.copytree(d, self.j(self.pydlib, self.b(d)), True)
+                    handled.add(self.b(d))
+
+            handled.add('easy-install.pth')
+            for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
+                handled.add(self.b(d))
+                zip_safe = self.is_zip_safe(d)
+                for x in os.listdir(d):
+                    if x == 'EGG-INFO':
+                        continue
+                    if zip_safe:
+                        self.add_to_zipfile(zf, x, d)
+                    else:
+                        absp = self.j(d, x)
+                        dest = self.j(self.pydlib, x)
+                        if os.path.isdir(absp):
+                            shutil.copytree(absp, dest, True)
+                        else:
+                            shutil.copy2(absp, dest)
+
+            for x in os.listdir(sp):
+                if x in handled or x.endswith('.egg-info'):
+                    continue
+                absp = self.j(sp, x)
+                if os.path.isdir(absp):
+                    if not os.listdir(absp):
+                        continue
+                    if self.is_zip_safe(absp):
+                        self.add_to_zipfile(zf, x, sp)
+                    else:
+                        shutil.copytree(absp, self.j(self.pydlib, x), True)
+                else:
+                    if x.endswith('.pyd'):
+                        shutil.copy2(absp, self.j(self.pydlib, x))
+                    else:
+                        self.add_to_zipfile(zf, x, sp)
+
+        shutil.rmtree(self.lib_dir)
+
+    def is_zip_safe(self, path):
+        for f in walk(path):
+            ext = os.path.splitext(f)[1].lower()
+            if ext in ('.pyd', '.dll', '.exe'):
+                return False
+        return True
+
+    def get_pth_dirs(self, pth):
+        base = os.path.dirname(pth)
+        for line in open(pth).readlines():
+            line = line.strip()
+            if not line or line.startswith('#') or line.startswith('import'):
+                continue
+            if line == 'win32\\lib':
+                continue
+            candidate = self.j(base, line)
+            if os.path.exists(candidate):
+                yield candidate
+
+    def add_to_zipfile(self, zf, name, base, exclude=frozenset()):
+        abspath = self.j(base, name)
+        name = name.replace(os.sep, '/')
+        if name in self.zf_names:
+            raise ValueError('Already added %r to zipfile [%r]'%(name, abspath))
+        zinfo = zipfile.ZipInfo(filename=name, date_time=self.zf_timestamp)
+
+        if os.path.isdir(abspath):
+            if not os.listdir(abspath):
+                return
+            zinfo.external_attr = 0700 << 16
+            zf.writestr(zinfo, '')
+            for x in os.listdir(abspath):
+                if x not in exclude:
+                    self.add_to_zipfile(zf, name + os.sep + x, base)
+        else:
+            ext = os.path.splitext(name)[1].lower()
+            if ext in ('.pyd', '.dll', '.exe'):
+                raise ValueError('Cannot add %r to zipfile'%abspath)
+            zinfo.external_attr = 0600 << 16
+            if ext in ('.py', '.pyc', '.pyo'):
+                with open(abspath, 'rb') as f:
+                    zf.writestr(zinfo, f.read())
+
+        self.zf_names.add(name)
+
+
 
diff --git a/setup/installer/windows/site.py b/setup/installer/windows/site.py
index 0e770f3253..5610ff197e 100644
--- a/setup/installer/windows/site.py
+++ b/setup/installer/windows/site.py
@@ -96,7 +96,7 @@ def main():
 
     abs__file__()
 
-    addsitedir(os.path.join(sys.app_dir, 'Lib', 'site-packages'))
+    addsitedir(os.path.join(sys.app_dir, 'pydlib'))
 
     add_calibre_vars()
 
diff --git a/setup/installer/windows/util.c b/setup/installer/windows/util.c
index fdec6d786f..329e3bf8c3 100644
--- a/setup/installer/windows/util.c
+++ b/setup/installer/windows/util.c
@@ -198,7 +198,7 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
     buf[strlen(buf)-1] = '\0';
 
     _snprintf_s(python_home, MAX_PATH, _TRUNCATE, "%s", buf);
-    _snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\DLLs;%s\\Lib;%s\\Lib\\site-packages",
+    _snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\pylib.zip;%s\\pydlib;%s\\DLLs",
             buf, buf, buf);
     free(buf);
 

From 9e575a11b42de4bb2af8ac607999af0d891dd791 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 19 Mar 2011 18:07:13 -0600
Subject: [PATCH 08/26] ...

---
 setup/installer/windows/wix-template.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup/installer/windows/wix-template.xml b/setup/installer/windows/wix-template.xml
index 37dd8b25a8..9892041fee 100644
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@@ -154,9 +154,9 @@
                 <CustomAction Id="LaunchApplication" BinaryKey="WixCA"
                     DllEntry="WixShellExec" Impersonate="yes"/>
 
-                <InstallUISequence>
+                <!--<InstallUISequence>
                     <FileCost Suppress="yes" />
-                </InstallUISequence>
+                </InstallUISequence>-->
 
 		</Product>
 </Wix>

From 833eb4f28572e451b5cb612d88bd1674c2e95236 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Mar 2011 10:20:39 -0600
Subject: [PATCH 09/26] Fix #9459 (T-Mobile Optimus T (LG P509) not detected)

---
 src/calibre/devices/android/driver.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index a527e8a29b..e2ed159008 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -58,7 +58,7 @@ class ANDROID(USBMS):
             0x413c : { 0xb007 : [0x0100, 0x0224]},
 
             # LG
-            0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100] },
+            0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
 
             # Archos
             0x0e79 : {
@@ -92,14 +92,14 @@ class ANDROID(USBMS):
 
     VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
             'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
-            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC']
+            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE']
     WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
             '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
             'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
             'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
             'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
             'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
-            '7', 'A956', 'A955', 'A43']
+            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM']
     WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
             'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
             'A70S', 'A101IT', '7']

From 73d13c3f6a8c405da0412f194976be7591641c56 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Mar 2011 10:23:41 -0600
Subject: [PATCH 10/26] ...

---
 src/calibre/gui2/widgets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py
index 4ff2562bea..c570a6e159 100644
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@@ -317,7 +317,7 @@ class CoverView(QGraphicsView, ImageDropMixin):
         ImageDropMixin.__init__(self)
 
     def get_pixmap(self):
-        for item in self.scene().items():
+        for item in self.scene.items():
             if hasattr(item, 'pixmap'):
                 return item.pixmap()
 

From 3de3f3d4fbda16be5da6d45f597bcab00c61ed04 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Mar 2011 10:53:11 -0600
Subject: [PATCH 11/26] ...

---
 session.vim                     | 2 +-
 src/calibre/ebooks/rtf/input.py | 1 +
 src/calibre/utils/wmf/parse.py  | 5 +++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/session.vim b/session.vim
index f2adf71de9..fa14a92fba 100644
--- a/session.vim
+++ b/session.vim
@@ -18,6 +18,6 @@ def recipe_title_callback(raw):
     return eval(raw.decode('utf-8'))
 
 vipy.session.add_content_browser('.r', ',r', 'Recipe',
-    vipy.session.glob_based_iterator(os.path.join(project_dir, 'resources', 'recipes', '*.recipe')),
+    vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
     vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
 EOFPY
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 52f6feb071..1594b2fbce 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -22,6 +22,7 @@ border_style_map = {
         'dot-dot-dash': 'dotted',
         'outset': 'outset',
         'tripple': 'double',
+        'triple': 'double',
         'thick-thin-small': 'solid',
         'thin-thick-small': 'solid',
         'thin-thick-thin-small': 'solid',
diff --git a/src/calibre/utils/wmf/parse.py b/src/calibre/utils/wmf/parse.py
index c618884e33..9dc035d3e1 100644
--- a/src/calibre/utils/wmf/parse.py
+++ b/src/calibre/utils/wmf/parse.py
@@ -251,12 +251,12 @@ class WMF(object):
         img.load(bmp)
         return img.export('png')
 
-def wmf_unwrap(wmf_data):
+def wmf_unwrap(wmf_data, verbose=0):
     '''
     Return the largest embedded raster image in the WMF.
     The returned data is in PNG format.
     '''
-    w = WMF()
+    w = WMF(verbose=verbose)
     w(wmf_data)
     if not w.has_raster_image:
         raise ValueError('No raster image found in the WMF')
@@ -266,4 +266,5 @@ if __name__ == '__main__':
     wmf = WMF(verbose=4)
     wmf(open(sys.argv[-1], 'rb'))
     open('/t/test.bmp', 'wb').write(wmf.bitmaps[0])
+    open('/t/test.png', 'wb').write(wmf.to_png())
 

From 517a3e397575cd34fc77046a79e8d3f29d025ee5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Mar 2011 11:18:18 -0600
Subject: [PATCH 12/26] Fix Washington Post

---
 recipes/wash_post.recipe | 68 ++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 38 deletions(-)

diff --git a/recipes/wash_post.recipe b/recipes/wash_post.recipe
index fb6d5bc598..3af89d502e 100644
--- a/recipes/wash_post.recipe
+++ b/recipes/wash_post.recipe
@@ -1,4 +1,3 @@
-import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 
@@ -6,55 +5,48 @@ class WashingtonPost(BasicNewsRecipe):
 
     title = 'Washington Post'
     description = 'US political news'
-    __author__ = 'Kovid Goyal and Sujata Raman'
+    __author__ = 'Kovid Goyal'
     use_embedded_content   = False
     max_articles_per_feed = 20
     language = 'en'
+    encoding = 'utf-8'
 
 
     remove_javascript = True
     no_stylesheets = True
 
-    extra_css       = '''
-                        #articleCopyright { font-family:Arial,helvetica,sans-serif ; font-weight:bold ; font-size:x-small ;}
-                        p { font-family:"Times New Roman",times,serif ; font-weight:normal ; font-size:small ;}
-                        body{font-family:arial,helvetica,sans-serif}
-                            '''
-
-    feeds = [   ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
-                ('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
-                ('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
-                ('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
-                ('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
-                ('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
-                ('Health', 'http://www.washingtonpost.com/wp-dyn/rss/health/index.xml'),
-                ('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
-                ('Style',
-                     'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
-                ('NFL Sports',
-                     'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
-                ('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
-                ('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
+    feeds = [
+            ('Politics', 'http://www.washingtonpost.com/rss/politics'),
+            ('Nation', 'http://www.washingtonpost.com/rss/national'),
+            ('World', 'http://www.washingtonpost.com/rss/world'),
+            ('Business', 'http://www.washingtonpost.com/rss/business'),
+            ('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'),
+            ('Sports', 'http://www.washingtonpost.com/rss/sports'),
+            ('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'),
+            ('Opinions', 'http://www.washingtonpost.com/rss/opinions'),
+            ('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'),
+            ('Local', 'http://www.washingtonpost.com/rss/local'),
+            ('Investigations',
+                'http://www.washingtonpost.com/rss/investigations'),
     ]
 
-    remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
+    remove_tags = [
+            {'class':lambda x: x and 'article-toolbar' in x},
+            {'class':lambda x: x and 'quick-comments' in x},
+            {'class':lambda x: x and 'tweet' in x},
+            {'class':lambda x: x and 'article-related' in x},
+            {'class':lambda x: x and 'hidden' in x.split()},
+            {'class':lambda x: x and 'also-read' in x.split()},
+            {'class':lambda x: x and 'partners-content' in x.split()},
+            {'class':['module share', 'module ads', 'comment-vars', 'hidden',
+                'share-icons-wrap', 'comments']},
+            {'id':['right-rail']},
 
+            ]
+    keep_only_tags = dict(id=['content', 'article'])
 
-    def get_article_url(self, article):
-        return article.get('guid', article.get('link', None))
 
     def print_version(self, url):
-        return url.rpartition('.')[0] + '_pf.html'
+        url = url.rpartition('?')[0]
+        return url.replace('_story.html', '_singlePage.html')
 
-    def postprocess_html(self, soup, first):
-        for div in soup.findAll(name='div', style=re.compile('margin')):
-            div['style'] = ''
-        return soup
-
-    def preprocess_html(self, soup):
-        for tag in soup.findAll('font'):
-            if tag.has_key('size'):
-                if tag['size'] == '+2':
-                    if tag.b:
-                        return soup
-        return None

From 893035b874434a512f8fc54eeb4596c46dacae38 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Mar 2011 11:21:01 -0600
Subject: [PATCH 13/26] Fix Christian Science Monitor

---
 recipes/chr_mon.recipe | 66 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/recipes/chr_mon.recipe b/recipes/chr_mon.recipe
index 2b431ebd0b..6f41b95763 100644
--- a/recipes/chr_mon.recipe
+++ b/recipes/chr_mon.recipe
@@ -8,13 +8,13 @@ __description__ = 'Providing context and clarity on national and international n
 
 '''csmonitor.com'''
 
-
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
+
 class ChristianScienceMonitor(BasicNewsRecipe):
 
-    author        = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini'
+    __author__    = 'Kovid Goyal'
     description   = 'Providing context and clarity on national and international news, peoples and cultures'
 
     cover_url      = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
@@ -34,6 +34,49 @@ class ChristianScienceMonitor(BasicNewsRecipe):
     remove_javascript     = True
     no_stylesheets = True
 
+    def append_page(self, soup, appendtag, position):
+        nav = soup.find('div',attrs={'class':'navigation'})
+        if nav:
+            pager = nav.findAll('a')
+            for part in pager:
+                if 'Next' in part:
+                    nexturl = ('http://www.csmonitor.com' +
+                           re.findall(r'href="(.*?)"', str(part))[0])
+                    soup2 = self.index_to_soup(nexturl)
+                    texttag = soup2.find('div',
+                                 attrs={'class': re.compile('list-article-.*')})
+                    trash_c = soup2.findAll(attrs={'class': 'list-description'})
+                    trash_h = soup2.h1
+                    for tc in trash_c: tc.extract()
+                    trash_h.extract()
+
+                    newpos = len(texttag.contents)
+                    self.append_page(soup2, texttag, newpos)
+                    texttag.extract()
+                    appendtag.insert(position, texttag)
+
+    def preprocess_html(self, soup):
+        PRINT_RE = re.compile(r'/layout/set/print/content/view/print/[0-9]*')
+        html = str(soup)
+        try:
+            print_found = PRINT_RE.findall(html)
+        except Exception:
+            pass
+        if print_found:
+            print_url = 'http://www.csmonitor.com' + print_found[0]
+            print_soup = self.index_to_soup(print_url)
+        else:
+            self.append_page(soup, soup.body, 3)
+
+            trash_a = soup.findAll(attrs={'class': re.compile('navigation.*')})
+            trash_b = soup.findAll(attrs={'style': re.compile('.*')})
+            trash_d = soup.findAll(attrs={'class': 'sByline'})
+            for ta in trash_a: ta.extract()
+            for tb in trash_b: tb.extract()
+            for td in trash_d: td.extract()
+
+            print_soup = soup
+        return print_soup
 
     preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
         [
@@ -43,7 +86,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
         (r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
               lambda match : '</body>'),
         ]]
-
     extra_css      = '''
                         h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
                         .sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
@@ -56,10 +98,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                         #main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
                         #photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
                         span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
-                        p#dateline{color:#444444 ;  font-family:Arial,Helvetica,sans-serif ; font-style:italic;}
-                        '''
-    feeds          = [
-                        (u'Top Stories' , u'http://rss.csmonitor.com/feeds/top'),
+                        p#dateline{color:#444444 ;  font-family:Arial,Helvetica,sans-serif ; font-style:italic;} '''
+
+    feeds          = [(u'Top Stories', u'http://rss.csmonitor.com/feeds/top'),
                         (u'World' , u'http://rss.csmonitor.com/feeds/world'),
                         (u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
                         (u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
@@ -74,9 +115,7 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                         (u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
                      ]
 
-    keep_only_tags = [
-                        dict(name='div', attrs={'id':'mainColumn'}),
-                        ]
+    keep_only_tags = [dict(name='div', attrs={'id':'mainColumn'}), ]
 
     remove_tags    = [
                         dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
@@ -86,7 +125,10 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                                 'hide', 'podBrdr']}),
                         dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
                         dict(name='form', attrs={'id':[ 'commentform']}) ,
+          dict(name='div', attrs={'class': ['ui-comments']})
                     ]
 
-    remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})]
-
+    remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']}),
+              dict(name='div', attrs={'class': [re.compile('navigation.*')]}),
+              dict(name='div', attrs={'style': [re.compile('.*')]})
+                        ]

From d5166ed7bc9e764cb7dca315588100b2b3fbdd82 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 20 Mar 2011 19:10:15 -0400
Subject: [PATCH 14/26] TXT Input: Textile: More tweaks.

---
 src/calibre/ebooks/textile/functions.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py
index 5e07cdaec2..88d27b036d 100755
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@@ -202,26 +202,31 @@ class Textile(object):
         (re.compile(r'{Rs}'),            r'&#8360;'),  #  Rupee
         (re.compile(r'{(C=|=C)}'),       r'&#8364;'),  #  euro
         (re.compile(r'{tm}'),            r'&#8482;'),  #  trademark
-        (re.compile(r'{spade}'),         r'&#9824;'),  #  spade
-        (re.compile(r'{club}'),          r'&#9827;'),  #  club
-        (re.compile(r'{heart}'),         r'&#9829;'),  #  heart
-        (re.compile(r'{diamond}'),       r'&#9830;'),  #  diamond
+        (re.compile(r'{spades?}'),       r'&#9824;'),  #  spade
+        (re.compile(r'{clubs?}'),        r'&#9827;'),  #  club
+        (re.compile(r'{hearts?}'),       r'&#9829;'),  #  heart
+        (re.compile(r'{diam(onds?|s)}'), r'&#9830;'),  #  diamond
     ]
     glyph_defaults = [
         (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'),                   r'\1\2&#215;\3'),                       #  dimension sign
         (re.compile(r'(\d+)\'', re.I),                                 r'\1&#8242;'),                          #  prime
         (re.compile(r'(\d+)\"', re.I),                                 r'\1&#8243;'),                          #  prime-double
+        (re.compile(r'(\')\''),                                        r'\1&#8217;'),                          #  single closing - following another
         (re.compile(r"(\w)\'(\w)"),                                    r'\1&#8217;\2'),                        #  apostrophe's
         (re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),                        r'\1&#8217;\2'),                        #  back in '88
-        (re.compile(r'(\S)\'(?=\s|\'|<|$)'),                           r'\1&#8217;'),                          #  single closing
-        (re.compile(r'\'/'),                                           r'&#8216;'),                            #  single opening
+        (re.compile(r'(\s\[)\''),                                      r'\1&#8216;'),                          #  single opening - following ws+[
+        (re.compile(r'(\S)\'(?=\s|'+pnct+'|<|$)', re.M),               r'\1&#8217;'),                          #  single closing
+        (re.compile(r'\''),                                            r'&#8216;'),                            #  single opening
         (re.compile(r'(\")\"'),                                        r'\1&#8221;'),                          #  double closing - following another
-        (re.compile(r'(\S)\"(?=\s|&#8221;|<|$)'),                      r'\1&#8221;'),                          #  double closing
+        (re.compile(r'(\s\[)\"'),                                      r'\1&#8220;'),                          #  double opening - following whitespace+[
+        (re.compile(r'(\S)\"(?=\s|'+pnct+'|<|$)', re.M),               r'\1&#8221;'),                          #  double closing
         (re.compile(r'"'),                                             r'&#8220;'),                            #  double opening
         (re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),      r'<acronym title="\2">\1</acronym>'),   #  3+ uppercase acronym
         (re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),         r'<span class="caps">\1</span>'),       #  3+ uppercase
-        (re.compile(r'\b(\s{0,1})?\.{3}'),                             r'\1&#8260;'),                          #  ellipsis
-        (re.compile(r'(\s?)--(\s?)'),                                  r'\1&#8212;\2'),                        #  em dash
+        (re.compile(r'\b(\s{0,1})?\.{3}'),                             r'\1&#8230;'),                          #  ellipsis
+        (re.compile(r'^[\*_-]{3,}$', re.M),                            r'<hr />'),                             #  <hr> scene-break
+        (re.compile(r'\b--\b'),                                        r'&#8212;'),                            #  em dash
+        (re.compile(r'(\s)--(\s)'),                                    r'\1&#8212;\2'),                        #  em dash
         (re.compile(r'\s-(?:\s|$)'),                                   r' &#8211; '),                          #  en dash
         (re.compile(r'\b( ?)[([]TM[])]', re.I),                        r'\1&#8482;'),                          #  trademark
         (re.compile(r'\b( ?)[([]R[])]', re.I),                         r'\1&#174;'),                           #  registered
@@ -747,7 +752,7 @@ class Textile(object):
         return url
 
     def shelve(self, text):
-        id = str(uuid.uuid4())
+        id = str(uuid.uuid4()) + 'c'
         self.shelf[id] = text
         return id
 

From 32703cb261e843709f27150d575b59fccfa5c780 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Mar 2011 21:09:18 -0600
Subject: [PATCH 15/26] Fix #739120 (Migrate calibre bzr commit plugin to
 launchpad)

---
 src/calibre/trac/bzr_commit_plugin.py | 107 ++++++++------------------
 1 file changed, 32 insertions(+), 75 deletions(-)

diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py
index 325bac7a79..c70e6fbf13 100644
--- a/src/calibre/trac/bzr_commit_plugin.py
+++ b/src/calibre/trac/bzr_commit_plugin.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env  python
+#!/usr/bin/env  python2
 
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
@@ -8,114 +8,71 @@ __docformat__ = 'restructuredtext en'
 Plugin to make the commit command automatically close bugs when the commit
 message contains `Fix #number` or `Implement #number`. Also updates the commit
 message with the summary of the closed bug. It also set the `--fixes` metadata
-appropriately. Currently only works with a Trac bug repository with the XMLRPC
-plugin enabled.
-
-To use copy this file into `~/.bazaar/plugins` and add the following to branch.conf
-in the working tree you want to use it with::
-
-    trac_reponame_url = <url>
-    trac_reponame_username = <username>
-    trac_reponame_password = <password>
+appropriately.
 
 '''
-import os, re, xmlrpclib, subprocess
-from bzrlib.builtins import cmd_commit as _cmd_commit, tree_files
-from bzrlib import branch
+import re, urllib, importlib, sys
+from bzrlib.builtins import cmd_commit as _cmd_commit
 import bzrlib
 
+from lxml import html
+
+SENDMAIL = ('/home/kovid/work/kde', 'pgp_mail')
 
 class cmd_commit(_cmd_commit):
 
-    @classmethod
-    def trac_url(self, username, password, url):
-        return url.replace('//', '//%s:%s@'%(username, password))+'/login/xmlrpc'
-
-    def get_trac_summary(self, bug, url):
-        print 'Getting bug summary for bug #%s'%bug,
-        server = xmlrpclib.ServerProxy(url)
-        attributes = server.ticket.get(int(bug))[-1]
-        print attributes['summary']
-        return attributes['summary']
-
-    def expand_bug(self, msg, nick, config, bug_tracker, type='trac'):
-        prefix = '%s_%s_'%(type, nick)
-        username = config.get_user_option(prefix+'username')
-        password = config.get_user_option(prefix+'password')
-        close_bug = config.get_user_option(prefix+'pattern')
-        if close_bug is None:
-            close_bug = r'(Fix|Implement|Fixes|Fixed|Implemented)\s+#(\d+)'
+    def expand_bug(self, msg):
+        close_bug = r'(Fix|Implement|Fixes|Fixed|Implemented)\s+#(\d+)'
         close_bug_pat = re.compile(close_bug, re.IGNORECASE)
         match = close_bug_pat.search(msg)
         if not match:
             return msg, None, None, None
         action, bug = match.group(1), match.group(2)
         summary = ''
-        if type == 'trac':
-            url = self.trac_url(username, password, bug_tracker)
-            summary = self.get_trac_summary(bug, url)
+        raw = urllib.urlopen('https://bugs.launchpad.net/calibre/+bug/' +
+                bug).read()
+        h1 = html.fromstring(raw).xpath('//h1[@id="edit-title"]')[0]
+        summary = html.tostring(h1, method='text', encoding=unicode).strip()
+        print 'Working on bug:', summary
         if summary:
             msg = msg.replace('#%s'%bug, '#%s (%s)'%(bug, summary))
             msg = msg.replace('Fixesed', 'Fixed')
-        return msg, bug, url, action
-
-
-    def get_bugtracker(self, basedir, type='trac'):
-        config = os.path.join(basedir, '.bzr', 'branch', 'branch.conf')
-        bugtracker, nick = None, None
-        if os.access(config, os.R_OK):
-            for line in open(config).readlines():
-                match = re.search(r'%s_(\S+)_url\s*=\s*(\S+)'%type, line)
-                if match:
-                    nick, bugtracker = match.group(1), match.group(2)
-                    break
-        return nick, bugtracker
-
-    def expand_message(self, msg, tree):
-        nick, bugtracker = self.get_bugtracker(tree.basedir, type='trac')
-        if not bugtracker:
-            return msg
-        config =  branch.Branch.open(tree.basedir).get_config()
-        msg, bug, url, action = self.expand_bug(msg, nick, config, bugtracker)
-
-        return msg, bug, url, action, nick, config
+        return msg, bug, action
 
     def run(self, message=None, file=None, verbose=False, selected_list=None,
             unchanged=False, strict=False, local=False, fixes=None,
             author=None, show_diff=False, exclude=None):
-        nick = config = bug = action = None
+        bug = action = None
         if message:
-            try:
-                message, bug, url, action, nick, config = \
-                    self.expand_message(message, tree_files(selected_list)[0])
-            except ValueError:
-                pass
+            message, bug, action = self.expand_bug(message)
 
-            if nick and bug and not fixes:
-                fixes = [nick+':'+bug]
+            if bug and not fixes:
+                fixes = ['lp:'+bug]
 
         ret = _cmd_commit.run(self, message=message, file=file, verbose=verbose,
                               selected_list=selected_list, unchanged=unchanged,
                               strict=strict, local=local, fixes=fixes,
                               author=author, show_diff=show_diff, exclude=exclude)
-        if message and bug and action and nick and config:
-            self.close_bug(bug, action, url, config)
+        if message and bug and action:
+            self.close_bug(bug, action)
         return ret
 
-    def close_bug(self, bug, action, url, config):
+    def close_bug(self, bug, action):
         print 'Closing bug #%s'% bug
         #nick = config.get_nickname()
-        suffix = config.get_user_option('bug_close_comment')
-        if suffix is None:
-            suffix = 'The fix will be in the next release.'
+        suffix = ('The fix will be in the next release.'
+                'calibre is usually released every Friday.')
         action = action+'ed'
         msg = '%s in branch %s. %s'%(action, 'lp:calibre', suffix)
         msg = msg.replace('Fixesed', 'Fixed')
-        server = xmlrpclib.ServerProxy(url)
-        server.ticket.update(int(bug), msg,
-                             {'status':'closed', 'resolution':'fixed'},
-                             True)
-        subprocess.Popen('/home/kovid/work/kde/mail.py -f --delay 10'.split())
+        msg += '\n\n status fixreleased'
+
+        sys.path.insert(0, SENDMAIL[0])
+
+        sendmail = importlib.import_module(SENDMAIL[1])
+
+        to = bug+'@bugs.launchpad.net'
+        sendmail.sendmail(msg, to, 'Re: calibre bug '+bug)
 
 
 bzrlib.commands.register_command(cmd_commit)

From 4c5a6213d95264ab8b8412f6a1826249e754c479 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Mar 2011 22:25:08 -0600
Subject: [PATCH 16/26] Have the donate button go to calibre-ebook.com instead
 fo a custom HTML page

---
 src/calibre/gui2/ui.py                | 41 ++++-----------------------
 src/calibre/trac/bzr_commit_plugin.py |  2 +-
 2 files changed, 6 insertions(+), 37 deletions(-)

diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index 7b94c1e821..54f0bd3517 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -12,18 +12,17 @@ __docformat__ = 'restructuredtext en'
 import collections, os, sys, textwrap, time, gc
 from Queue import Queue, Empty
 from threading import Thread
-from PyQt4.Qt import Qt, SIGNAL, QTimer, QHelpEvent, QAction, \
-                     QMenu, QIcon, pyqtSignal, \
-                     QDialog, QSystemTrayIcon, QApplication, QKeySequence
+from PyQt4.Qt import (Qt, SIGNAL, QTimer, QHelpEvent, QAction,
+                     QMenu, QIcon, pyqtSignal, QUrl,
+                     QDialog, QSystemTrayIcon, QApplication, QKeySequence)
 
 from calibre import  prints
 from calibre.constants import __appname__, isosx
-from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.config import prefs, dynamic
 from calibre.utils.ipc.server import Server
 from calibre.library.database2 import LibraryDatabase2
 from calibre.customize.ui import interface_actions
-from calibre.gui2 import error_dialog, GetMetadata, open_local_file, \
+from calibre.gui2 import error_dialog, GetMetadata, open_url, \
         gprefs, max_available_height, config, info_dialog, Dispatcher, \
         question_dialog
 from calibre.gui2.cover_flow import CoverFlowMixin
@@ -567,37 +566,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
         QApplication.instance().quit()
 
     def donate(self, *args):
-        BUTTON = '''
-        <form action="https://www.paypal.com/cgi-bin/webscr" method="post">
-            <input type="hidden" name="cmd" value="_s-xclick" />
-            <input type="hidden" name="hosted_button_id" value="3029467" />
-            <input type="image" src="https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif" border="0" name="submit" alt="Donate to support calibre development" />
-            <img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
-        </form>
-        '''
-        MSG = _('is the result of the efforts of many volunteers from all '
-                'over the world. If you find it useful, please consider '
-                'donating to support its development. Your donation helps '
-                'keep calibre development going.')
-        HTML = u'''
-        <html>
-            <head>
-                <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
-                <title>Donate to support calibre</title>
-            </head>
-            <body style="background:white">
-                <div><a href="http://calibre-ebook.com"><img style="border:0px"
-                src="file://%s" alt="calibre" /></a></div>
-                <p>Calibre %s</p>
-                %s
-            </body>
-        </html>
-        '''%(P('content_server/calibre_banner.png').replace(os.sep, '/'), MSG, BUTTON)
-        pt = PersistentTemporaryFile('_donate.htm')
-        pt.write(HTML.encode('utf-8'))
-        pt.close()
-        open_local_file(pt.name)
-
+        open_url(QUrl('http://calibre-ebook.com/donate'))
 
     def confirm_quit(self):
         if self.job_manager.has_jobs():
diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py
index c70e6fbf13..c70e8db703 100644
--- a/src/calibre/trac/bzr_commit_plugin.py
+++ b/src/calibre/trac/bzr_commit_plugin.py
@@ -26,7 +26,7 @@ class cmd_commit(_cmd_commit):
         close_bug_pat = re.compile(close_bug, re.IGNORECASE)
         match = close_bug_pat.search(msg)
         if not match:
-            return msg, None, None, None
+            return msg, None, None
         action, bug = match.group(1), match.group(2)
         summary = ''
         raw = urllib.urlopen('https://bugs.launchpad.net/calibre/+bug/' +

From bba6e03a118d1ba3ffa241179029e740efaa46a5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 20 Mar 2011 22:29:03 -0600
Subject: [PATCH 17/26] Add the keyboard shortcut: Ctrl+Shift+R to restart
 calibre in debug mode

---
 src/calibre/gui2/actions/preferences.py | 5 ++++-
 src/calibre/manual/gui.rst              | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/actions/preferences.py b/src/calibre/gui2/actions/preferences.py
index ee52f06aac..6615f5c017 100644
--- a/src/calibre/gui2/actions/preferences.py
+++ b/src/calibre/gui2/actions/preferences.py
@@ -25,8 +25,11 @@ class PreferencesAction(InterfaceAction):
                 self.gui.run_wizard)
         if not DEBUG:
             pm.addSeparator()
-            pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
+            ac = pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
                 self.debug_restart)
+            ac.setShortcut('Ctrl+Shift+R')
+            self.gui.addAction(ac)
+
         self.qaction.setMenu(pm)
         self.preferences_menu = pm
         for x in (self.gui.preferences_action, self.qaction):
diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst
index 158bd81e50..3ef1518209 100644
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@@ -549,6 +549,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
       - Download metadata and shortcuts
     * - :kbd:`Ctrl+R`
       - Restart calibre
+    * - :kbd:`Ctrl+Shift+R`
+      - Restart calibre in debug mode
     * - :kbd:`Shift+Ctrl+E`
       - Add empty books to calibre
     * - :kbd:`Ctrl+Q`

From de1e2369b3f7ce128764fb2e2cee6d7ba356084a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Mar 2011 09:33:46 -0600
Subject: [PATCH 18/26] Fix #739212 (new Android device ids)

---
 src/calibre/devices/android/driver.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index e2ed159008..26039f16ef 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -64,6 +64,7 @@ class ANDROID(USBMS):
             0x0e79 : {
                 0x1400 : [0x0222, 0x0216],
                 0x1408 : [0x0222, 0x0216],
+                0x1411 : [0x216],
                 0x1417 : [0x0216],
                 0x1419 : [0x0216],
                 0x1420 : [0x0216],

From 9385758a28f6257fe7a4adb263fddfc7ed924888 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Mar 2011 10:09:09 -0600
Subject: [PATCH 19/26] News download: Handle titles with ASCII control codes
 in them. Fixes #739322 (News fetching - NULL bytes problem)

---
 src/calibre/utils/cleantext.py    | 11 +++++++----
 src/calibre/web/feeds/__init__.py |  3 ++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/calibre/utils/cleantext.py b/src/calibre/utils/cleantext.py
index 89101a6219..27e667612e 100644
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@@ -8,15 +8,18 @@ import re, htmlentitydefs
 _ascii_pat = None
 
 def clean_ascii_chars(txt, charlist=None):
-    '''
-    Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
-    This is all control chars except \\t,\\n and \\r
+    r'''
+    Remove ASCII control chars.
+    This is all control chars except \t, \n and \r
     '''
     if not txt:
         return ''
     global _ascii_pat
     if _ascii_pat is None:
-        chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
+        chars = set(xrange(32))
+        chars.add(127)
+        for x in (9, 10, 13):
+            chars.remove(x)
         _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
 
     if charlist is None:
diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py
index cddb776b4c..a10fb03f91 100644
--- a/src/calibre/web/feeds/__init__.py
+++ b/src/calibre/web/feeds/__init__.py
@@ -28,6 +28,7 @@ class Article(object):
             pass
         if not isinstance(self._title, unicode):
             self._title = self._title.decode('utf-8', 'replace')
+        self._title = clean_ascii_chars(self._title)
         self.url = url
         self.author = author
         if author and not isinstance(author, unicode):
@@ -75,7 +76,7 @@ class Article(object):
                 t = t.decode('utf-8', 'replace')
             return t
         def fset(self, val):
-            self._title = val
+            self._title = clean_ascii_chars(val)
         return property(fget=fget, fset=fset)
 
 

From 738f5b66e65a23f4a5cc8716fb90d8c69e1c2e7a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Mar 2011 10:38:56 -0600
Subject: [PATCH 20/26] Fix #739484 (Blackberry OS6)

---
 src/calibre/devices/blackberry/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/blackberry/driver.py b/src/calibre/devices/blackberry/driver.py
index e816883957..1ae6a6c49f 100644
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):
 
     VENDOR_ID   = [0x0fca]
     PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211]
+    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]
 
     VENDOR_NAME = 'RIM'
     WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'

From d12b40a18e52d6cd87582d6d216402c82caa381a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Mar 2011 12:14:15 -0600
Subject: [PATCH 21/26] Fix regression that broke dropping lots of books onto
 items in the Tag Browser

---
 src/calibre/gui2/tag_view.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index 5423e546ea..34fa3a8b10 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -16,8 +16,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, QFont, QSize, \
                      QIcon, QPoint, QVBoxLayout, QHBoxLayout, QComboBox, QTimer,\
                      QAbstractItemModel, QVariant, QModelIndex, QMenu, QFrame,\
                      QPushButton, QWidget, QItemDelegate, QString, QLabel, \
-                     QShortcut, QKeySequence, SIGNAL, QMimeData, QSizePolicy,\
-                     QToolButton
+                     QShortcut, QKeySequence, SIGNAL, QMimeData, QToolButton
 
 from calibre.ebooks.metadata import title_sort
 from calibre.gui2 import config, NONE, gprefs
@@ -1052,12 +1051,12 @@ class TagsModel(QAbstractItemModel): # {{{
         if (key == 'authors' and len(ids) >= 5):
             if not confirm('<p>'+_('Changing the authors for several books can '
                            'take a while. Are you sure?')
-                        +'</p>', 'tag_browser_drop_authors', self.parent()):
+                        +'</p>', 'tag_browser_drop_authors', self.tags_view):
                 return
         elif len(ids) > 15:
             if not confirm('<p>'+_('Changing the metadata for that many books '
                            'can take a while. Are you sure?')
-                        +'</p>', 'tag_browser_many_changes', self.parent()):
+                        +'</p>', 'tag_browser_many_changes', self.tags_view):
                 return
 
         fm = self.db.metadata_for_field(key)

From 83c8257a146ab75f6453a826ebdc0b30ea3f788b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Mar 2011 15:50:21 -0600
Subject: [PATCH 22/26] Conversion: Detect and remove fake page margins that
 are specified as a margin on every paragraph. This can be turned off via an
 option under Structure Detection

---
 src/calibre/ebooks/conversion/cli.py          |   5 +-
 src/calibre/ebooks/conversion/plumber.py      |  15 ++
 .../ebooks/oeb/transforms/page_margin.py      | 153 ++++++++++++++++++
 .../gui2/convert/structure_detection.py       |   2 +-
 .../gui2/convert/structure_detection.ui       |  13 +-
 5 files changed, 183 insertions(+), 5 deletions(-)
 create mode 100644 src/calibre/ebooks/oeb/transforms/page_margin.py

diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index 975507e2a7..f1d5d5fe1b 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -49,6 +49,8 @@ HEURISTIC_OPTIONS = ['markup_chapter_headings',
                       'dehyphenate', 'renumber_headings',
                       'replace_scene_breaks']
 
+DEFAULT_TRUE_OPTIONS = HEURISTIC_OPTIONS + ['remove_fake_margins']
+
 def print_help(parser, log):
     help = parser.format_help().encode(preferred_encoding, 'replace')
     log(help)
@@ -90,7 +92,7 @@ def option_recommendation_to_cli_option(add_option, rec):
     if opt.long_switch == 'verbose':
         attrs['action'] = 'count'
         attrs.pop('type', '')
-    if opt.name in HEURISTIC_OPTIONS and rec.recommended_value is True:
+    if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
         switches = ['--disable-'+opt.long_switch]
     add_option(Option(*switches, **attrs))
 
@@ -162,6 +164,7 @@ def add_pipeline_options(parser, plumber):
                       'chapter', 'chapter_mark',
                       'prefer_metadata_cover', 'remove_first_image',
                       'insert_metadata', 'page_breaks_before',
+                      'remove_fake_margins',
                   ]
                   ),
 
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 9a0c3f3c7f..6272e7b10b 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -304,6 +304,17 @@ OptionRecommendation(name='page_breaks_before',
             'before the specified elements.')
         ),
 
+OptionRecommendation(name='remove_fake_margins',
+            recommended_value=True, level=OptionRecommendation.LOW,
+            help=_('Some documents specify page margins by '
+                'specifying a left and right margin on each individual '
+                'paragraph. calibre will try to detect and remove these '
+                'margins. Sometimes, this can cause the removal of '
+                'margins that should not have been removed. In this '
+                'case you can disable the removal.')
+        ),
+
+
 OptionRecommendation(name='margin_top',
         recommended_value=5.0, level=OptionRecommendation.LOW,
         help=_('Set the top margin in pts. Default is %default. '
@@ -988,9 +999,13 @@ OptionRecommendation(name='sr3_replace',
                 page_break_on_body=self.output_plugin.file_type in ('mobi',
                     'lit'))
         flattener(self.oeb, self.opts)
+
         self.opts.insert_blank_line = oibl
         self.opts.remove_paragraph_spacing = orps
 
+        from calibre.ebooks.oeb.transforms.page_margin import RemoveFakeMargins
+        RemoveFakeMargins()(self.oeb, self.log, self.opts)
+
         pr(0.9)
         self.flush()
 
diff --git a/src/calibre/ebooks/oeb/transforms/page_margin.py b/src/calibre/ebooks/oeb/transforms/page_margin.py
new file mode 100644
index 0000000000..589f004dd1
--- /dev/null
+++ b/src/calibre/ebooks/oeb/transforms/page_margin.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from collections import Counter
+
+from calibre.ebooks.oeb.base import OEB_STYLES, barename, XPath
+
+class RemoveFakeMargins(object):
+
+    '''
+    Remove left and right margins from paragraph/divs if the same margin is specified
+    on almost all the elements of at that level.
+
+    Must be called only after CSS flattening
+    '''
+
+    def __call__(self, oeb, log, opts):
+        if not opts.remove_fake_margins:
+            return
+        self.oeb, self.log, self.opts = oeb, log, opts
+        stylesheet = None
+        self.levels = {}
+        self.stats = {}
+        self.selector_map = {}
+
+        for item in self.oeb.manifest:
+            if item.media_type.lower() in OEB_STYLES:
+                stylesheet = item
+                break
+        if stylesheet is None:
+            return
+
+        self.log('Removing fake margins...')
+
+        stylesheet = stylesheet.data
+
+        from cssutils.css import CSSRule
+        for rule in stylesheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
+            self.selector_map[rule.selectorList.selectorText] = rule.style
+
+        self.find_levels()
+
+        for level in self.levels:
+            self.process_level(level)
+
+    def get_margins(self, elem):
+        cls = elem.get('class', None)
+        if cls:
+            style = self.selector_map.get('.'+cls, None)
+            if style:
+                return style.marginLeft, style.marginRight, style
+        return '', '', None
+
+
+    def process_level(self, level):
+        elems = self.levels[level]
+        self.stats[level+'_left'] = Counter()
+        self.stats[level+'_right'] = Counter()
+
+        for elem in elems:
+            lm, rm = self.get_margins(elem)[:2]
+            self.stats[level+'_left'][lm] += 1
+            self.stats[level+'_right'][rm] += 1
+
+        self.log.debug(level, ' left margin stats:', self.stats[level+'_left'])
+        self.log.debug(level, ' right margin stats:', self.stats[level+'_right'])
+
+        remove_left = self.analyze_stats(self.stats[level+'_left'])
+        remove_right = self.analyze_stats(self.stats[level+'_right'])
+
+
+        if remove_left:
+            mcl = self.stats[level+'_left'].most_common(1)[0][0]
+            self.log('Removing level %s left margin of:'%level, mcl)
+
+        if remove_right:
+            mcr = self.stats[level+'_right'].most_common(1)[0][0]
+            self.log('Removing level %s right margin of:'%level, mcr)
+
+        if remove_left or remove_right:
+            for elem in elems:
+                lm, rm, style = self.get_margins(elem)
+                if remove_left and lm == mcl:
+                    style.removeProperty('margin-left')
+                if remove_right and rm == mcr:
+                    style.removeProperty('margin-right')
+
+    def find_levels(self):
+
+        def level_of(elem, body):
+            ans = 1
+            while elem.getparent() is not body:
+                ans += 1
+                elem = elem.getparent()
+            return ans
+
+        paras = XPath('descendant::h:p|descendant::h:div')
+
+        for item in self.oeb.spine:
+            body = XPath('//h:body')(item.data)
+            if not body:
+                continue
+            body = body[0]
+
+            for p in paras(body):
+                level = level_of(p, body)
+                level = '%s_%d'%(barename(p.tag), level)
+                if level not in self.levels:
+                    self.levels[level] = []
+                self.levels[level].append(p)
+
+        remove = set()
+        for k, v in self.levels.iteritems():
+            num = len(v)
+            self.log.debug('Found %d items of level:'%num, k)
+            level = int(k.split('_')[-1])
+            tag = k.split('_')[0]
+            if tag == 'p' and num < 25:
+                remove.add(k)
+            if tag == 'div':
+                if level > 2 and num < 25:
+                    remove.add(k)
+                elif level < 3:
+                    # Check each level < 3 element and only keep those
+                    # that have many child paras
+                    for elem in list(v):
+                        children = len(paras(elem))
+                        if children < 5:
+                            v.remove(elem)
+
+        for k in remove:
+            self.levels.pop(k)
+            self.log.debug('Ignoring level', k)
+
+    def analyze_stats(self, stats):
+        if not stats:
+            return False
+        mc = stats.most_common(1)
+        if len(mc) > 1:
+            return False
+        mc = mc[0]
+        most_common, most_common_count = mc
+        if not most_common or most_common == '0':
+            return False
+        total = sum(stats.values())
+        # True if greater than 95% of elements have the same margin
+        return most_common_count/total > 0.95
diff --git a/src/calibre/gui2/convert/structure_detection.py b/src/calibre/gui2/convert/structure_detection.py
index d8e2f4f122..b58c473bd4 100644
--- a/src/calibre/gui2/convert/structure_detection.py
+++ b/src/calibre/gui2/convert/structure_detection.py
@@ -21,7 +21,7 @@ class StructureDetectionWidget(Widget, Ui_Form):
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent,
                 ['chapter', 'chapter_mark',
-                'remove_first_image',
+                'remove_first_image', 'remove_fake_margins',
                 'insert_metadata', 'page_breaks_before']
                 )
         self.db, self.book_id = db, book_id
diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui
index f80e6f8182..4ba90c1c2c 100644
--- a/src/calibre/gui2/convert/structure_detection.ui
+++ b/src/calibre/gui2/convert/structure_detection.ui
@@ -48,10 +48,10 @@
      </property>
     </widget>
    </item>
-   <item row="6" column="0" colspan="3">
+   <item row="7" column="0" colspan="3">
     <widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
    </item>
-   <item row="7" column="0" colspan="3">
+   <item row="8" column="0" colspan="3">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -77,7 +77,7 @@
      </property>
     </spacer>
    </item>
-   <item row="4" column="0" colspan="3">
+   <item row="5" column="0" colspan="3">
     <widget class="QLabel" name="label_2">
      <property name="text">
       <string>The header and footer removal options have been replaced by the Search &amp; Replace options. Click the Search &amp; Replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
@@ -87,6 +87,13 @@
      </property>
     </widget>
    </item>
+   <item row="2" column="2">
+    <widget class="QCheckBox" name="opt_remove_fake_margins">
+     <property name="text">
+      <string>Remove &amp;fake margins</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <customwidgets>

From 6f3baa43575d0e5e26f9a1ca4f8bbfed06c22cb4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Mar 2011 16:49:49 -0600
Subject: [PATCH 23/26] Caijing Magazine by Eric Chen

---
 recipes/caijing.recipe | 79 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 recipes/caijing.recipe

diff --git a/recipes/caijing.recipe b/recipes/caijing.recipe
new file mode 100644
index 0000000000..34e6c1e8a9
--- /dev/null
+++ b/recipes/caijing.recipe
@@ -0,0 +1,79 @@
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class Caijing(BasicNewsRecipe):
+
+    title       = 'Caijing Magazine'
+    __author__  = 'Eric Chen'
+
+    description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
+                 Magazine has firmly established itself as a news authority and leading voice for
+                 business and financial issues in China.
+                 CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
+                 developments and policy changes, as well as major events in the capital markets. It also
+                 offers a broad international perspective through first-hand reporting on international
+                 political and economic issues.
+                 CAIJING Magazine is China's most widely read business and finance magazine, with a
+                 circulation of 225,000 per issue. It boasts top-level readers from government, business
+                 and academic circles. '''
+    language = 'zh'
+    category = 'news, China'
+    encoding = 'UTF-8'
+    timefmt = ' [%a, %d %b, %Y]'
+    needs_subscription = True
+
+    remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
+        'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
+        'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
+                dict(name=['script', 'noscript', 'style'])]
+    no_stylesheets = True
+    remove_javascript = True
+    current_issue_url = ""
+    current_issue_cover = ""
+
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://service.caijing.com.cn/usermanage/login')
+            br.select_form(name='mainLoginForm')
+            br['username'] = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        articles = []
+        soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
+        div = soup0.find('div', attrs={'class':'fmcon'})
+        link = div.find('a', href=True)
+        current_issue_url = link['href']
+
+        soup = self.index_to_soup(current_issue_url)
+
+        for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
+            if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
+                self.current_issue_cover = div_cover['src']
+
+        feeds = []
+        for section in soup.findAll('div', attrs={'class':'cebd'}):
+            section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
+            articles = []
+            for post in section.findAll('a', href=True):
+                if re.search('\d{4}-\d{2}-\d{2}', post['href']):
+                        date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
+                id = re.search('\d{9}', post['href']).group(0)
+                url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
+                url = url + id + '&time=' + date + '&cl=106&page=all'
+
+                title = self.tag_to_string(post)
+                articles.append({'title':title, 'url':url, 'date':date})
+
+            if articles:
+                feeds.append((section_title, articles))
+        return feeds
+
+    def get_cover_url(self):
+        return self.current_issue_cover
+

From 15fa4f71c4881773f1762aeb3fc9bc4c5ea5200c Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 21 Mar 2011 19:12:54 -0400
Subject: [PATCH 24/26] TXT Input: Textile: Rely on smarty pants to handle
 quotes.

---
 src/calibre/ebooks/textile/functions.py | 14 ++------------
 src/calibre/ebooks/txt/input.py         |  1 +
 src/calibre/utils/smartypants.py        |  6 ++++++
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py
index 88d27b036d..c3c82ef893 100755
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@@ -211,16 +211,6 @@ class Textile(object):
         (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'),                   r'\1\2&#215;\3'),                       #  dimension sign
         (re.compile(r'(\d+)\'', re.I),                                 r'\1&#8242;'),                          #  prime
         (re.compile(r'(\d+)\"', re.I),                                 r'\1&#8243;'),                          #  prime-double
-        (re.compile(r'(\')\''),                                        r'\1&#8217;'),                          #  single closing - following another
-        (re.compile(r"(\w)\'(\w)"),                                    r'\1&#8217;\2'),                        #  apostrophe's
-        (re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),                        r'\1&#8217;\2'),                        #  back in '88
-        (re.compile(r'(\s\[)\''),                                      r'\1&#8216;'),                          #  single opening - following ws+[
-        (re.compile(r'(\S)\'(?=\s|'+pnct+'|<|$)', re.M),               r'\1&#8217;'),                          #  single closing
-        (re.compile(r'\''),                                            r'&#8216;'),                            #  single opening
-        (re.compile(r'(\")\"'),                                        r'\1&#8221;'),                          #  double closing - following another
-        (re.compile(r'(\s\[)\"'),                                      r'\1&#8220;'),                          #  double opening - following whitespace+[
-        (re.compile(r'(\S)\"(?=\s|'+pnct+'|<|$)', re.M),               r'\1&#8221;'),                          #  double closing
-        (re.compile(r'"'),                                             r'&#8220;'),                            #  double opening
         (re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),      r'<acronym title="\2">\1</acronym>'),   #  3+ uppercase acronym
         (re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),         r'<span class="caps">\1</span>'),       #  3+ uppercase
         (re.compile(r'\b(\s{0,1})?\.{3}'),                             r'\1&#8230;'),                          #  ellipsis
@@ -870,11 +860,11 @@ class Textile(object):
         'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
         """
         qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
-        pnct = ".,\"'?!;:()"
+        pnct = ".,\"'?!;:"
 
         for qtag in qtags:
             pattern = re.compile(r"""
-                (?:^|(?<=[\s>%(pnct)s])|\[|([\]}]))
+                (?:^|(?<=[\s>%(pnct)s\(])|\[|([\]}]))
                 (%(qtag)s)(?!%(qtag)s)
                 (%(c)s)
                 (?::(\S+))?
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 99f7035800..7face4c24f 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -165,6 +165,7 @@ class TXTInput(InputFormatPlugin):
         elif options.formatting_type == 'textile':
             log.debug('Running text through textile conversion...')
             html = convert_textile(txt)
+            setattr(options, 'smarten_punctuation', True)
         else:
             log.debug('Running text through basic conversion...')
             flow_size = getattr(options, 'flow_size', 0)
diff --git a/src/calibre/utils/smartypants.py b/src/calibre/utils/smartypants.py
index 62845b8d7a..8763a313fc 100644
--- a/src/calibre/utils/smartypants.py
+++ b/src/calibre/utils/smartypants.py
@@ -584,6 +584,12 @@ def educateQuotes(str):
 	#   <p>He said, "'Quoted' words in a larger quote."</p>
 	str = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", str)
 	str = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", str)
+	str = re.sub(r'''""(?=\w)''', """&#8220;&#8220;""", str)
+	str = re.sub(r"""''(?=\w)""", """&#8216;&#8216;""", str)
+	str = re.sub(r'''\"\'''',     """&#8221;&#8217;""", str)
+	str = re.sub(r'''\'\"''',     """&#8217;&#8221;""", str)
+	str = re.sub(r'''""''',       """&#8221;&#8221;""", str)
+	str = re.sub(r"""''""",       """&#8217;&#8217;""", str)
 
 	# Special case for decade abbreviations (the '80s):
 	str = re.sub(r"""\b'(?=\d{2}s)""", r"""&#8217;""", str)

From d37f302a0e96ec946ed8b78d34732d53dfa1a69a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Mar 2011 22:24:36 -0600
Subject: [PATCH 25/26] ...

---
 src/calibre/trac/bzr_commit_plugin.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py
index c70e8db703..7e5a1367cb 100644
--- a/src/calibre/trac/bzr_commit_plugin.py
+++ b/src/calibre/trac/bzr_commit_plugin.py
@@ -60,7 +60,7 @@ class cmd_commit(_cmd_commit):
     def close_bug(self, bug, action):
         print 'Closing bug #%s'% bug
         #nick = config.get_nickname()
-        suffix = ('The fix will be in the next release.'
+        suffix = ('The fix will be in the next release. '
                 'calibre is usually released every Friday.')
         action = action+'ed'
         msg = '%s in branch %s. %s'%(action, 'lp:calibre', suffix)
@@ -72,7 +72,7 @@ class cmd_commit(_cmd_commit):
         sendmail = importlib.import_module(SENDMAIL[1])
 
         to = bug+'@bugs.launchpad.net'
-        sendmail.sendmail(msg, to, 'Re: calibre bug '+bug)
+        sendmail.sendmail(msg, to, 'Fixed in lp:calibre')
 
 
 bzrlib.commands.register_command(cmd_commit)

From 74d1fb4c4912ef93a90ed1622188d76c0b58d56d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Mar 2011 22:26:21 -0600
Subject: [PATCH 26/26] Initial implementation of relevance sorting of metadata
 identify results. Needs testing

---
 src/calibre/ebooks/metadata/sources/amazon.py |  37 ++++--
 src/calibre/ebooks/metadata/sources/base.py   | 105 ++++++++++++++++++
 src/calibre/ebooks/metadata/sources/google.py |   5 +-
 src/calibre/ebooks/metadata/sources/test.py   |  14 +--
 4 files changed, 137 insertions(+), 24 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index b99893ccba..9460ed7ace 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -28,11 +28,12 @@ class Worker(Thread): # {{{
     Get book details from amazons book page in a separate thread
     '''
 
-    def __init__(self, url, result_queue, browser, log, timeout=20):
+    def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
         Thread.__init__(self)
         self.daemon = True
         self.url, self.result_queue = url, result_queue
         self.log, self.timeout = log, timeout
+        self.relevance, self.plugin = relevance, plugin
         self.browser = browser.clone_browser()
         self.cover_url = self.amazon_id = self.isbn = None
 
@@ -161,6 +162,15 @@ class Worker(Thread): # {{{
         else:
             self.log.warning('Failed to find product description for url: %r'%self.url)
 
+        mi.source_relevance = self.relevance
+
+        if self.amazon_id:
+            if self.isbn:
+                self.plugin.cache_isbn_to_identifier(self.isbn, self.amazon_id)
+            if self.cover_url:
+                self.cache_identifier_to_cover_url(self.amazon_id,
+                        self.cover_url)
+
         self.result_queue.put(mi)
 
     def parse_asin(self, root):
@@ -321,6 +331,20 @@ class Amazon(Source):
 
     # }}}
 
+    def get_cached_cover_url(self, identifiers):
+        url = None
+        asin = identifiers.get('amazon', None)
+        if asin is None:
+            asin = identifiers.get('asin', None)
+        if asin is None:
+            isbn = identifiers.get('isbn', None)
+            if isbn is not None:
+                asin = self.cached_isbn_to_identifier(isbn)
+        if asin is not None:
+            url = self.cached_identifier_to_cover_url(asin)
+
+        return url
+
     def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
             identifiers={}, timeout=30):
         '''
@@ -396,7 +420,8 @@ class Amazon(Source):
             log.error('No matches found with query: %r'%query)
             return
 
-        workers = [Worker(url, result_queue, br, log) for url in matches]
+        workers = [Worker(url, result_queue, br, log, i, self) for i, url in
+                enumerate(matches)]
 
         for w in workers:
             w.start()
@@ -414,14 +439,6 @@ class Amazon(Source):
             if not a_worker_is_alive:
                 break
 
-        for w in workers:
-            if w.amazon_id:
-                if w.isbn:
-                    self.cache_isbn_to_identifier(w.isbn, w.amazon_id)
-                if w.cover_url:
-                    self.cache_identifier_to_cover_url(w.amazon_id,
-                            w.cover_url)
-
         return None
     # }}}
 
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 55cc996cf7..90d7f82d65 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -21,6 +21,21 @@ def create_log(ostream=None):
     log.outputs = [FileStream(ostream)]
     return log
 
+words = ("the", "a", "an", "of", "and")
+prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
+trailing_paren_pat = re.compile(r'\(.*\)$')
+whitespace_pat = re.compile(r'\s+')
+
+def cleanup_title(s):
+    if not s:
+        s = _('Unknown')
+    s = s.strip().lower()
+    s = prefix_pat.sub(' ', s)
+    s = trailing_paren_pat.sub('', s)
+    s = whitespace_pat.sub(' ', s)
+    return s.strip()
+
+
 class Source(Plugin):
 
     type = _('Metadata source')
@@ -128,10 +143,91 @@ class Source(Plugin):
                 gr.append(job)
         return [g for g in groups if g]
 
+    def test_fields(self, mi):
+        '''
+        Return the first field from self.touched_fields that is null on the
+        mi object
+        '''
+        for key in self.touched_fields:
+            if key.startswith('identifier:'):
+                key = key.partition(':')[-1]
+                if not mi.has_identifier(key):
+                    return 'identifier: ' + key
+            elif mi.is_null(key):
+                return key
+
+
     # }}}
 
     # Metadata API {{{
 
+    def get_cached_cover_url(self, identifiers):
+        '''
+        Return cached cover URL for the book identified by
+        the identifiers dict or Noneif no such URL exists
+        '''
+        return None
+
+    def compare_identify_results(self, x, y, title=None, authors=None,
+            identifiers={}):
+        '''
+        Method used to sort the results from a call to identify by relevance.
+        Uses the actual query and various heuristics to rank results.
+        Re-implement in your plugin if this generic algorithm is not suitable.
+        Note that this method assumes x and y have a source_relevance
+        attribute.
+
+        one < two iff one is more relevant than two
+        '''
+        # First, guarantee that if the query specifies an ISBN, the result with
+        # the same isbn is the most relevant
+        def isbn_test(mi):
+            return mi.isbn and mi.isbn == identifiers.get('isbn', None)
+
+        def boolcmp(a, b):
+            return -1 if a and not b else 1 if not a and b else 0
+
+        x_has_isbn, y_has_isbn = isbn_test(x), isbn_test(y)
+        result = boolcmp(x_has_isbn, y_has_isbn)
+        if result != 0:
+            return result
+
+        # Now prefer results that have complete metadata over those that don't
+        x_has_all_fields = self.test_fields(x) is None
+        y_has_all_fields = self.test_fields(y) is None
+
+        result = boolcmp(x_has_all_fields, y_has_all_fields)
+        if result != 0:
+            return result
+
+        # Now prefer results whose title matches the search query
+        if title:
+            x_title = cleanup_title(x.title)
+            y_title = cleanup_title(y.title)
+            t = cleanup_title(title)
+            x_has_title, y_has_title = x_title == t, y_title == t
+            result = boolcmp(x_has_title, y_has_title)
+            if result != 0:
+                return result
+
+        # Now prefer results with the longer comments, within 10%
+        cx = len(x.comments.strip() if x.comments else '')
+        cy = len(y.comments.strip() if y.comments else '')
+        t = (cx + cy) / 20
+        result = cy - cx
+        if result != 0 and abs(cx - cy) > t:
+            return result
+
+        # Now prefer results with cached cover URLs
+        x_has_cover = self.get_cached_cover_url(x.identifiers) is not None
+        y_has_cover = self.get_cached_cover_url(y.identifiers) is not None
+        result = boolcmp(x_has_cover, y_has_cover)
+        if result != 0:
+            return result
+
+        # Now use the relevance reported by the remote search engine
+        return x.source_relevance - y.source_relevance
+
     def identify(self, log, result_queue, abort, title=None, authors=None,
             identifiers={}, timeout=5):
         '''
@@ -147,6 +243,15 @@ class Source(Plugin):
         the same ISBN/special identifier does not need to get the cover URL
         again. Use the caching API for this.
 
+        Every Metadata object put into result_queue by this method must have a
+        `source_relevance` attribute that is an integer indicating the order in
+        which the results were returned by the metadata source for this query.
+        This integer will be used by :meth:`compare_identify_results`. If the
+        order is unimportant, set it to zero for every result.
+
+        Make sure that any cover/isbn mapping information is cached before the
+        Metadata object is put into result_queue.
+
         :param log: A log object, use it to output debugging information/errors
         :param result_queue: A result Queue, results should be put into it.
                             Each result is a Metadata object
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index c44ad81b6c..b7298c0099 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -190,14 +190,15 @@ class GoogleBooks(Source):
         return raw and len(raw) > 17000 and raw[1:4] != 'PNG'
 
     def get_all_details(self, br, log, entries, abort, result_queue, timeout):
-        for i in entries:
+        for relevance, i in enumerate(entries):
             try:
                 ans = to_metadata(br, log, i, timeout)
                 if isinstance(ans, Metadata):
-                    result_queue.put(ans)
+                    ans.source_relevance = relevance
                     for isbn in getattr(ans, 'all_isbns', []):
                         self.cache_isbn_to_identifier(isbn,
                                 ans.identifiers['google'])
+                    result_queue.put(ans)
             except:
                 log.exception(
                     'Failed to get metadata for identify entry:',
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index 2af9a47078..032041ef29 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -46,15 +46,6 @@ def authors_test(authors):
 
     return test
 
-def _test_fields(touched_fields, mi):
-    for key in touched_fields:
-        if key.startswith('identifier:'):
-            key = key.partition(':')[-1]
-            if not mi.has_identifier(key):
-                return 'identifier: ' + key
-        elif mi.is_null(key):
-            return key
-
 
 def test_identify_plugin(name, tests):
     '''
@@ -120,11 +111,10 @@ def test_identify_plugin(name, tests):
             prints('Log saved to', lf)
             raise SystemExit(1)
 
-        good = [x for x in possibles if _test_fields(plugin.touched_fields, x) is
+        good = [x for x in possibles if plugin.test_fields(x) is
                 None]
         if not good:
-            prints('Failed to find', _test_fields(plugin.touched_fields,
-                possibles[0]))
+            prints('Failed to find', plugin.test_fields(possibles[0]))
             raise SystemExit(1)