sync with Kovid's branch

2025-07-09 03:04:10 -04:00 · 2013-05-16 19:32:12 +02:00 · 2013-05-16 19:32:12 +02:00 · 7b96fc5530
commit 7b96fc5530
parent ce03634581 6ad8c353ee
186 changed files with 65974 additions and 52383 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,60 @@
 #   new recipes:
 #     - title: 
 - version: 0.9.30
  date: 2013-05-10
  new features:
    - title: "Kobo driver: Add support for showing 'Archived' books on the device. Also up the supported firmware version to 2.5.3."
      tickets: [1177677]
    - title: "Driver for Blackberry 9790"
      tickets: [1176607]
    - title: "Add a tweak to turn off the highlighting of the book count when using a virtual library (Preferences->Tweaks)"
    - title: "Add a button to clear the viewer search history in the viewer Preferences, under Miscellaneous"
    - title: "Add keyboard shortcuts to clear the virtual Library and the additional restriction (Ctrl+Esc and Alt+Esc). Also use Shift+Esc to bring keyboard focus back tot he book list. Can be changed under Preferences->Keyboard"
    - title: "Docx metadata: Read the language of the file, if present"
  bug fixes:
    - title: "Kobo driver: Fix unable to read SD card on OS X/Linux"
      tickets: [1174815]
    - title: "Content server: Fix unable to download ORIGINAL_* formats"
      tickets: [1177158]
    - title: "Fix regression that broke searching for terms containing a quote mark"
      tickets: [1177114]
    - title: "Fix regression that broke conversion of txt files when no input encoding is specified"
      tickets: [1176622]
    - title: "When changing to a virtual library, refresh the Book Details panel."
      tickets: [1176296]
    - title: "Fix regression that caused searching for user categories to break."
      tickets: [1176187]
    - title: "Fix error when downloading only covers and reviewing downloaded metadata."
      tickets: [1176253]
    - title: "MOBI metadata: Strip XML unsafe unicode codepoints when reading metadata from MOBI files."
      tickets: [1175965]
    - title: "Txt Input: Use the gbk encoding for txt files with detected encoding of gb2312."
      tickets: [1175974]
    - title: "When pressing Ctrl+Home/End preserve the horizontal scroll position in the book list"
  improved recipes:
    - NSFW
    - Go Comics
    - Various Polish news sources
    - The Sun
 - version: 0.9.29
  date: 2013-05-03
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -12,12 +12,15 @@ class BenchmarkPl(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
    extra_css = 'ul {list-style-type: none;}'
    no_stylesheets = True
-    remove_attributes = ['style']
+    #remove_attributes = ['style']
    preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
-    keep_only_tags = [dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
+
    keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
    remove_tags_after = dict(id='article')
    remove_tags = [dict(name='div', attrs={'class':['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs = {'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
    INDEX = 'http://www.benchmark.pl'
    feeds          = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
                          (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
@ -42,46 +45,16 @@ class BenchmarkPl(BasicNewsRecipe):
        for r in appendtag.findAll(attrs={'class':'changePage'}):
            r.extract()
    def image_article(self, soup, appendtag):
        nexturl = soup.find('div', attrs={'class':'preview'})
        if nexturl:
            nexturl = nexturl.find('a', attrs={'class':'move_next'})
            image = appendtag.find('div', attrs={'class':'preview'}).div['style'][16:]
            image = self.INDEX + image[:image.find("')")]
            appendtag.find(attrs={'class':'preview'}).name='img'
            appendtag.find(attrs={'class':'preview'})['src']=image
            appendtag.find('a', attrs={'class':'move_next'}).extract()
        while nexturl:
            nexturl = self.INDEX + nexturl['href']
            soup2 = self.index_to_soup(nexturl)
            nexturl = soup2.find('a', attrs={'class':'move_next'})
            image = soup2.find('div', attrs={'class':'preview'}).div['style'][16:]
            image = self.INDEX + image[:image.find("')")]
            soup2.find(attrs={'class':'preview'}).name='img'
            soup2.find(attrs={'class':'preview'})['src']=image
            pagetext = soup2.find('div', attrs={'class':'gallery'})
            pagetext.find('div', attrs={'class':'title'}).extract()
            pagetext.find('div', attrs={'class':'thumb'}).extract()
            pagetext.find('div', attrs={'class':'panelOcenaObserwowane'}).extract()       
            if nexturl:
                pagetext.find('a', attrs={'class':'move_next'}).extract()
            pagetext.find('a', attrs={'class':'move_back'}).extract()
            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
            for comment in comments:
                comment.extract()
            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
    def preprocess_html(self, soup):
        if soup.find('div', attrs={'class':'preview'}):
            self.image_article(soup, soup.body)
        else:
        self.append_page(soup, soup.body)
        for a in soup('a'):
            if a.has_key('href') and not a['href'].startswith('http'):
                a['href'] = self.INDEX + a['href']
        for r in soup.findAll(attrs={'class':['comments', 'body']}):
            r.extract()
        tag1 = soup.find(attrs={'class':'inlineGallery'})
        if tag1:
            for tag in tag1.findAll('li'):
                tag['style'] = 'float: left; margin-right: 10px;'
            tag1.findNext('p')['style'] = 'clear: both;'
        return soup
--- a/recipes/comics_com.recipe
+++ b/recipes/comics_com.recipe
@ -1,224 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Comics(BasicNewsRecipe):
    title               = 'Comics.com'
    __author__          = 'Starson17'
    description         = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
    language            = 'en'
    use_embedded_content= False
    no_stylesheets      = True
    oldest_article      = 24
    remove_javascript   = True
    cover_url           = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
    recursions          = 0
    max_articles_per_feed = 10
    num_comics_to_get = 7
    simultaneous_downloads = 1
    # delay = 3
    keep_only_tags     = [dict(name='a', attrs={'class':'STR_StripImage'}),
                          dict(name='div', attrs={'class':'STR_Date'})
                          ]
    def parse_index(self):
        feeds = []
        for title, url in [
                            ("9 Chickweed Lane", "http://comics.com/9_chickweed_lane"),
                            ("Agnes", "http://comics.com/agnes"),
                            ("Alley Oop", "http://comics.com/alley_oop"),
                            ("Andy Capp", "http://comics.com/andy_capp"),
                            ("Arlo & Janis", "http://comics.com/arlo&janis"),
                            ("B.C.", "http://comics.com/bc"),
                            ("Ballard Street", "http://comics.com/ballard_street"),
                            # ("Ben", "http://comics.com/ben"),
                            # ("Betty", "http://comics.com/betty"),
                            # ("Big Nate", "http://comics.com/big_nate"),
                            # ("Brevity", "http://comics.com/brevity"),
                            # ("Candorville", "http://comics.com/candorville"),
                            # ("Cheap Thrills", "http://comics.com/cheap_thrills"),
                            # ("Committed", "http://comics.com/committed"),
                            # ("Cow & Boy", "http://comics.com/cow&boy"),
                            # ("Daddy's Home", "http://comics.com/daddys_home"),
                            # ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
                            # ("Drabble", "http://comics.com/drabble"),
                            # ("F Minus", "http://comics.com/f_minus"),
                            # ("Family Tree", "http://comics.com/family_tree"),
                            # ("Farcus", "http://comics.com/farcus"),
                            # ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
                            # ("Ferd'nand", "http://comics.com/ferdnand"),
                            # ("Flight Deck", "http://comics.com/flight_deck"),
                            # ("Flo & Friends", "http://comics.com/flo&friends"),
                            # ("Fort Knox", "http://comics.com/fort_knox"),
                            # ("Frank & Ernest", "http://comics.com/frank&ernest"),
                            # ("Frazz", "http://comics.com/frazz"),
                            # ("Free Range", "http://comics.com/free_range"),
                            # ("Geech Classics", "http://comics.com/geech_classics"),
                            # ("Get Fuzzy", "http://comics.com/get_fuzzy"),
                            # ("Girls & Sports", "http://comics.com/girls&sports"),
                            # ("Graffiti", "http://comics.com/graffiti"),
                            # ("Grand Avenue", "http://comics.com/grand_avenue"),
                            # ("Heathcliff", "http://comics.com/heathcliff"),
                            # "Heathcliff, a street-smart and mischievous cat with many adventures."
                            # ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
                            # ("Herman", "http://comics.com/herman"),
                            # ("Home and Away", "http://comics.com/home_and_away"),
                            # ("It's All About You", "http://comics.com/its_all_about_you"),
                            # ("Jane's World", "http://comics.com/janes_world"),
                            # ("Jump Start", "http://comics.com/jump_start"),
                            # ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
                            # ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
                            # ("Liberty Meadows", "http://comics.com/liberty_meadows"),
                            # ("Little Dog Lost", "http://comics.com/little_dog_lost"),
                            # ("Lola", "http://comics.com/lola"),
                            # ("Luann", "http://comics.com/luann"),
                            # ("Marmaduke", "http://comics.com/marmaduke"),
                            # ("Meg! Classics", "http://comics.com/meg_classics"),
                            # ("Minimum Security", "http://comics.com/minimum_security"),
                            # ("Moderately Confused", "http://comics.com/moderately_confused"),
                            # ("Momma", "http://comics.com/momma"),
                            # ("Monty", "http://comics.com/monty"),
                            # ("Motley Classics", "http://comics.com/motley_classics"),
                            # ("Nancy", "http://comics.com/nancy"),
                            # ("Natural Selection", "http://comics.com/natural_selection"),
                            # ("Nest Heads", "http://comics.com/nest_heads"),
                            # ("Off The Mark", "http://comics.com/off_the_mark"),
                            # ("On a Claire Day", "http://comics.com/on_a_claire_day"),
                            # ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
                            # ("Over the Hedge", "http://comics.com/over_the_hedge"),
                            # ("PC and Pixel", "http://comics.com/pc_and_pixel"),
                            # ("Peanuts", "http://comics.com/peanuts"),
                            # ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
                            # ("Pickles", "http://comics.com/pickles"),
                            # ("Prickly City", "http://comics.com/prickly_city"),
                            # ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
                            # ("Reality Check", "http://comics.com/reality_check"),
                            # ("Red & Rover", "http://comics.com/red&rover"),
                            # ("Rip Haywire", "http://comics.com/rip_haywire"),
                            # ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
                            # ("Rose Is Rose", "http://comics.com/rose_is_rose"),
                            # ("Rubes", "http://comics.com/rubes"),
                            # ("Rudy Park", "http://comics.com/rudy_park"),
                            # ("Scary Gary", "http://comics.com/scary_gary"),
                            # ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
                            # ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
                            # ("Speed Bump", "http://comics.com/speed_bump"),
                            # ("Spot The Frog", "http://comics.com/spot_the_frog"),
                            # ("State of the Union", "http://comics.com/state_of_the_union"),
                            # ("Strange Brew", "http://comics.com/strange_brew"),
                            # ("Tarzan Classics", "http://comics.com/tarzan_classics"),
                            # ("That's Life", "http://comics.com/thats_life"),
                            # ("The Barn", "http://comics.com/the_barn"),
                            # ("The Born Loser", "http://comics.com/the_born_loser"),
                            # ("The Buckets", "http://comics.com/the_buckets"),
                            # ("The Dinette Set", "http://comics.com/the_dinette_set"),
                            # ("The Grizzwells", "http://comics.com/the_grizzwells"),
                            # ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
                            # ("The Knight Life", "http://comics.com/the_knight_life"),
                            # ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
                            # ("The Other Coast", "http://comics.com/the_other_coast"),
                            # ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
                            # ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
                            # ("Watch Your Head", "http://comics.com/watch_your_head"),
                            # ("Wizard of Id", "http://comics.com/wizard_of_id"),
                            # ("Working Daze", "http://comics.com/working_daze"),
                            # ("Working It Out", "http://comics.com/working_it_out"),
                            # ("Zack Hill", "http://comics.com/zack_hill"),
                            # ("(Th)ink", "http://comics.com/think"),
                            # "Tackling the political and social issues impacting communities of color."
                            # ("Adam Zyglis", "http://comics.com/adam_zyglis"),
                            # "Known for his excellent caricatures, as well as independent and incisive imagery. "
                            # ("Andy Singer", "http://comics.com/andy_singer"),
                            # ("Bill Day", "http://comics.com/bill_day"),
                            # "Powerful images on sensitive issues."
                            # ("Bill Schorr", "http://comics.com/bill_schorr"),
                            # ("Bob Englehart", "http://comics.com/bob_englehart"),
                            # ("Brian Fairrington", "http://comics.com/brian_fairrington"),
                            # ("Bruce Beattie", "http://comics.com/bruce_beattie"),
                            # ("Cam Cardow", "http://comics.com/cam_cardow"),
                            # ("Chip Bok", "http://comics.com/chip_bok"),
                            # ("Chris Britt", "http://comics.com/chris_britt"),
                            # ("Chuck Asay", "http://comics.com/chuck_asay"),
                            # ("Clay Bennett", "http://comics.com/clay_bennett"),
                            # ("Daryl Cagle", "http://comics.com/daryl_cagle"),
                            # ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
                            # "David Fitzsimmons is a new editorial cartoons on comics.com.  He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
                            # ("Drew Litton", "http://comics.com/drew_litton"),
                            # "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
                            # ("Ed Stein", "http://comics.com/ed_stein"),
                            # "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
                            # ("Eric Allie", "http://comics.com/eric_allie"),
                            # "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
                            # ("Gary Markstein", "http://comics.com/gary_markstein"),
                            # ("Gary McCoy", "http://comics.com/gary_mccoy"),
                            # "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for  Best  Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
                            # ("Gary Varvel", "http://comics.com/gary_varvel"),
                            # ("Henry Payne", "http://comics.com/henry_payne"),
                            # ("JD Crowe", "http://comics.com/jd_crowe"),
                            # ("Jeff Parker", "http://comics.com/jeff_parker"),
                            # ("Jeff Stahler", "http://comics.com/jeff_stahler"),
                            # ("Jerry Holbert", "http://comics.com/jerry_holbert"),
                            # ("John Cole", "http://comics.com/john_cole"),
                            # ("John Darkow", "http://comics.com/john_darkow"),
                            # "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for  the Columbia Daily Tribune, Missouri"
                            # ("John Sherffius", "http://comics.com/john_sherffius"),
                            # ("Larry Wright", "http://comics.com/larry_wright"),
                            # ("Lisa Benson", "http://comics.com/lisa_benson"),
                            # ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
                            # ("Matt Bors", "http://comics.com/matt_bors"),
                            # ("Michael Ramirez", "http://comics.com/michael_ramirez"),
                            # ("Mike Keefe", "http://comics.com/mike_keefe"),
                            # ("Mike Luckovich", "http://comics.com/mike_luckovich"),
                            # ("MIke Thompson", "http://comics.com/mike_thompson"),
                            # ("Monte Wolverton", "http://comics.com/monte_wolverton"),
                            # "Unique mix of perspectives"
                            # ("Mr. Fish", "http://comics.com/mr_fish"),
                            # "Side effects may include swelling"
                            # ("Nate Beeler", "http://comics.com/nate_beeler"),
                            # "Middle America meets the Beltway."
                            # ("Nick Anderson", "http://comics.com/nick_anderson"),
                            # ("Pat Bagley", "http://comics.com/pat_bagley"),
                            # "Unfair and Totally Unbalanced."
                            # ("Paul Szep", "http://comics.com/paul_szep"),
                            # ("RJ Matson", "http://comics.com/rj_matson"),
                            # "Power cartoons from NYC and Capitol Hill"
                            # ("Rob Rogers", "http://comics.com/rob_rogers"),
                            # "Humorous slant on current events"
                            # ("Robert Ariail", "http://comics.com/robert_ariail"),
                            # "Clever and unpredictable"
                            # ("Scott Stantis", "http://comics.com/scott_stantis"),
                            # ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
                            # ("Steve Benson", "http://comics.com/steve_benson"),
                            # ("Steve Breen", "http://comics.com/steve_breen"),
                            # ("Steve Kelley", "http://comics.com/steve_kelley"),
                            # ("Steve Sack", "http://comics.com/steve_sack"),
                            ]:
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds
    def make_links(self, url):
        soup = self.index_to_soup(url)
        # print 'soup: ', soup
        title = ''
        current_articles = []
        pages = range(1, self.num_comics_to_get+1)
        for page in pages:
            page_url = url + '/?Page=' + str(page)
            soup = self.index_to_soup(page_url)
            if soup:
                strip_tag = soup.find('a', attrs={'class': 'STR_StripImage'})
                if strip_tag:
                  print 'strip_tag: ', strip_tag
                  title = strip_tag['title']
                  print 'title: ', title
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
        current_articles.reverse()
        return current_articles
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 		'''
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -16,7 +16,7 @@ class Computerworld_pl(BasicNewsRecipe):
    preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),]
    keep_only_tags = [dict(id=['szpaltaL', 's2011'])]
    remove_tags_after = dict(name='div', attrs={'class':'tresc'})
-    remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}),]
+    remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}), dict(name='a', attrs={'target':'_blank'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
    def skip_ad_pages(self, soup):
--- a/recipes/conowego_pl.recipe
+++ b/recipes/conowego_pl.recipe
@ -15,6 +15,7 @@ class CoNowegoPl(BasicNewsRecipe):
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
    keep_only_tags = [dict(name='div', attrs={'class':'news_list single_view'})]
    remove_tags = [dict(name='div', attrs={'class':['ni_bottom', 'ni_rank', 'ni_date']})]
    feeds          = [(u'Aktualno\u015bci', u'http://www.conowego.pl/rss/aktualnosci-5/?type=100'), (u'Gaming', u'http://www.conowego.pl/rss/gaming-6/?type=100'), (u'Porady', u'http://www.conowego.pl/rss/porady-3/?type=100'), (u'Testy', u'http://www.conowego.pl/rss/testy-2/?type=100')]
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
 # vim:fileencoding=UTF-8
 __license__     = 'GPL v3'
 __author__ = 'Mori'
@ -14,7 +15,7 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
    __author__ = 'Mori'
    language = 'pl'
-    title = u'Dziennik Internautow'
+    title = u'Dziennik Internautów'
    publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
    description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'
--- a/recipes/dot_net.recipe
+++ b/recipes/dot_net.recipe
@ -1,32 +1,37 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
-class NetMagazineRecipe (BasicNewsRecipe):
+class dotnetMagazine (BasicNewsRecipe):
-   __author__ = u'Marc Busqué <marc@lamarciana.com>'
+    __author__ = u'Bonni Salles'
   __url__ = 'http://www.lamarciana.com'
    __version__ = '1.0'
    __license__   = 'GPL v3'
-   __copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
+    __copyright__ = u'2013, Bonni Salles'
-   title = u'.net magazine'
+    title                 = '.net magazine'
   description = u'net is the world’s best-selling magazine for web designers and developers, featuring tutorials from leading agencies, interviews with the web’s biggest names, and agenda-setting features on the hottest issues affecting the internet today.'
   language = 'en'
   tags = 'web development, software'
    oldest_article        = 7
   remove_empty_feeds = True
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
    cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'
-   keep_only_tags = [
+
-         dict(name='article', attrs={'class': re.compile('^node.*$', re.IGNORECASE)})
+    remove_tags_after = dict(name='footer', id=lambda x:not x)
-         ]
+    remove_tags_before = dict(name='header', id=lambda x:not x)
    remove_tags = [
-         dict(name='span', attrs={'class': 'comment-count'}),
+         dict(name='div', attrs={'class': 'item-list'}),
-         dict(name='div', attrs={'class': 'item-list share-links'}),
+         dict(name='h4', attrs={'class': 'std-hdr'}),
-         dict(name='footer'),
+         dict(name='div', attrs={'class': 'item-list share-links'}), #removes share links
         dict(name=['script', 'noscript']),
         dict(name='div', attrs={'id': 'comments-form'}), #comment these out if you want the comments to show
         dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
         dict(name='div', attrs={'id': 'right-col'}),
         dict(name='div', attrs={'id': 'comments'}), #comment these out if you want the comments to show
         dict(name='div', attrs={'class': 'item-list related-content'}),
         ]
   remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height', 'style']
   extra_css = 'img {max-width: 100%; display: block; margin: auto;} .captioned-image div {text-align: center; font-style: italic;}'
    feeds = [
-         (u'.net', u'http://feeds.feedburner.com/net/topstories'),
+               (u'net', u'http://feeds.feedburner.com/net/topstories')
            ]
--- a/recipes/dziennik_lodzki.recipe
+++ b/recipes/dziennik_lodzki.recipe
@ -16,7 +16,7 @@ class DziennikLodzki(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/piano'})]
    feeds          = [(u'Na sygnale', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_nasygnale.xml?201302'), (u'\u0141\xf3d\u017a', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_lodz.xml?201302'), (u'Opinie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_opinie.xml?201302'), (u'Pieni\u0105dze', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533763/index.rss?201302'), (u'Kultura', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533762/index.rss?201302'), (u'Sport', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533761/index.rss?201302'), (u'Akcje', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_akcje.xml?201302'), (u'M\xf3j Reporter', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_mojreporter.xml?201302'), (u'Studni\xf3wki', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_studniowki.xml?201302'), (u'Kraj', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_kraj.xml?201302'), (u'Zdrowie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_zdrowie.xml?201302')]
--- a/recipes/dziennik_zachodni.recipe
+++ b/recipes/dziennik_zachodni.recipe
@ -16,7 +16,7 @@ class DziennikZachodni(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'}), dict(name='aside')]
    feeds          = [(u'Wszystkie', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533764/index.rss?201302'), (u'Wiadomo\u015bci', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533765/index.rss?201302'), (u'Regiony', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Opinie', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Blogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_blogi.xml?201302'), (u'Serwisy', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_serwisy.xml?201302'), (u'Sport', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533766/index.rss?201302'), (u'M\xf3j Reporter', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_mojreporter.xml?201302'), (u'Na narty', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_nanarty.xml?201302'), (u'Drogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_drogi.xml?201302'), (u'Pieni\u0105dze', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533768/index.rss?201302')]
--- a/recipes/echo_dnia.recipe
+++ b/recipes/echo_dnia.recipe
@ -16,6 +16,7 @@ class EchoDnia(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_empty_feeds = True
    no_stylesheets = True
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
    preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), 
--- a/recipes/ekundelek_pl.recipe
+++ b/recipes/ekundelek_pl.recipe
@ -12,7 +12,7 @@ class swiatczytnikow(BasicNewsRecipe):
    __author__ = u'Artur Stachecki'
    oldest_article = 7
    max_articles_per_feed = 100
-
+    remove_empty_feeds = True
    remove_tags = [dict(name = 'div', attrs = {'class' : 'feedflare'})]
    feeds = [(u'Wpisy', u'http://feeds.feedburner.com/Ekundelekpl?format=xml')]
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -11,6 +11,7 @@ class eMuzyka(BasicNewsRecipe):
    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
    remove_empty_feeds = True
    max_articles_per_feed = 100
    remove_attributes = ['style']
    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -9,6 +9,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
    category = 'newspaper'
    publication_type = 'newspaper'
    #encoding = 'iso-8859-2'
    masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    INDEX = 'http://wyborcza.pl'
    remove_empty_feeds = True
@ -16,6 +17,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets = True
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
    remove_tags_before = dict(id='k0')
    remove_tags_after = dict(id='banP4')
@ -24,7 +26,19 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
             (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
             (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
             (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
-             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
+             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
             (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
             (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
             (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
             (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
             (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
             (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
             (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
             (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
             (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
             (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
             (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'),
             (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
             ]
    def skip_ad_pages(self, soup):
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@ -31,6 +31,14 @@ class Gildia(BasicNewsRecipe):
            for link in content.findAll(name='a'):
                if 'fragment' in link['href']:
                    return self.index_to_soup(link['href'], raw=True)
        if 'relacj' in soup.title.string.lower():
            for link in content.findAll(name='a'):
                if 'relacj' in link['href']:
                    return self.index_to_soup(link['href'], raw=True)
        if 'wywiad' in soup.title.string.lower():
            for link in content.findAll(name='a'):
                if 'wywiad' in link['href']:
                    return self.index_to_soup(link['href'], raw=True)
    def preprocess_html(self, soup):
--- a/recipes/glos_wielkopolski.recipe
+++ b/recipes/glos_wielkopolski.recipe
@ -16,7 +16,7 @@ class GlosWielkopolski(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href', 'http://www.gloswielkopolski.pl/newsletter/'})]
    feeds          = [(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')]
--- a/recipes/go_comics.recipe
+++ b/recipes/go_comics.recipe
@ -1,229 +1,443 @@
 __license__   = 'GPL v3'
 __copyright__ = 'Copyright 2010 Starson17'
 '''
 www.gocomics.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
-
+class GoComics(BasicNewsRecipe):
-class Comics(BasicNewsRecipe):
+    title               = 'Go Comics'
    title               = 'Comics.com'
    __author__          = 'Starson17'
-    description         = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
+    __version__         = '1.06'
    __date__            = '07 June 2011'
    description         = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
    category            = 'news, comics'
    language            = 'en'
    use_embedded_content= False
    no_stylesheets      = True
    oldest_article      = 24
    remove_javascript   = True
-    cover_url           = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
+    remove_attributes = ['style']
    recursions          = 0
    max_articles_per_feed = 10
    num_comics_to_get = 7
    simultaneous_downloads = 1
    # delay = 3
-    keep_only_tags     = [dict(name='h1'),
+    ####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
-                          dict(name='p', attrs={'class':'feature_item'})
+    # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
    num_comics_to_get = 1
    # comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large
    comic_size = 900
    # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
    # Please do not overload their servers by selecting all comics and 1000 strips from each!
    conversion_options = {'linearize_tables'  : True
                        , 'comment'           : description
                        , 'tags'              : category
                        , 'language'          : language
                        }
    keep_only_tags     = [dict(name='div', attrs={'class':['feature','banner']}),
                          ]
    remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
                   dict(name='div', attrs={'class':['tag-wrapper']}),
                   dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
                   dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
                   dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
                   ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.addheaders = [('Referer','http://www.gocomics.com/')]
        return br
    def parse_index(self):
        feeds = []
        for title, url in [
-                            ("9 Chickweed Lane", "http://gocomics.com/9_chickweed_lane"),
+                       #(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
-                            ("Agnes", "http://gocomics.com/agnes"),
+                       #(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
-                            ("Alley Oop", "http://gocomics.com/alley_oop"),
+                       #(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
-                            ("Andy Capp", "http://gocomics.com/andy_capp"),
+                       #(u"Agnes", u"http://www.gocomics.com/agnes"),
-                            ("Arlo & Janis", "http://gocomics.com/arlo&janis"),
+                       #(u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
-                            ("B.C.", "http://gocomics.com/bc"),
+                       #(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
-                            ("Ballard Street", "http://gocomics.com/ballard_street"),
+                       (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
-                            # ("Ben", "http://comics.com/ben"),
+                       #(u"Annie", u"http://www.gocomics.com/annie"),
-                            # ("Betty", "http://comics.com/betty"),
+                       #(u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
-                            # ("Big Nate", "http://comics.com/big_nate"),
+                       #(u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
-                            # ("Brevity", "http://comics.com/brevity"),
+                       (u"B.C.", u"http://www.gocomics.com/bc"),
-                            # ("Candorville", "http://comics.com/candorville"),
+                       #(u"Back in the Day", u"http://www.gocomics.com/backintheday"),
-                            # ("Cheap Thrills", "http://comics.com/cheap_thrills"),
+                       #(u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
-                            # ("Committed", "http://comics.com/committed"),
+                       (u"Baldo", u"http://www.gocomics.com/baldo"),
-                            # ("Cow & Boy", "http://comics.com/cow&boy"),
+                       #(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
-                            # ("Daddy's Home", "http://comics.com/daddys_home"),
+                       #(u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
-                            # ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
+                       #(u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
-                            # ("Drabble", "http://comics.com/drabble"),
+                       #(u"Ben", u"http://www.gocomics.com/ben"),
-                            # ("F Minus", "http://comics.com/f_minus"),
+                       #(u"Betty", u"http://www.gocomics.com/betty"),
-                            # ("Family Tree", "http://comics.com/family_tree"),
+                       #(u"Bewley", u"http://www.gocomics.com/bewley"),
-                            # ("Farcus", "http://comics.com/farcus"),
+                       #(u"Big Nate", u"http://www.gocomics.com/bignate"),
-                            # ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
+                       #(u"Big Top", u"http://www.gocomics.com/bigtop"),
-                            # ("Ferd'nand", "http://comics.com/ferdnand"),
+                       #(u"Biographic", u"http://www.gocomics.com/biographic"),
-                            # ("Flight Deck", "http://comics.com/flight_deck"),
+                       #(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
-                            # ("Flo & Friends", "http://comics.com/flo&friends"),
+                       #(u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
-                            # ("Fort Knox", "http://comics.com/fort_knox"),
+                       #(u"Bliss", u"http://www.gocomics.com/bliss"),
-                            # ("Frank & Ernest", "http://comics.com/frank&ernest"),
+                       #(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
-                            # ("Frazz", "http://comics.com/frazz"),
+                       #(u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
-                            # ("Free Range", "http://comics.com/free_range"),
+                       #(u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
-                            # ("Geech Classics", "http://comics.com/geech_classics"),
+                       #(u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
-                            # ("Get Fuzzy", "http://comics.com/get_fuzzy"),
+                       #(u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
-                            # ("Girls & Sports", "http://comics.com/girls&sports"),
+                       (u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
-                            # ("Graffiti", "http://comics.com/graffiti"),
+                       #(u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
-                            # ("Grand Avenue", "http://comics.com/grand_avenue"),
+                       #(u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
-                            # ("Heathcliff", "http://comics.com/heathcliff"),
+                       #(u"Brevity", u"http://www.gocomics.com/brevity"),
-                            # "Heathcliff, a street-smart and mischievous cat with many adventures."
+                       #(u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
-                            # ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
+                       (u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
-                            # ("Herman", "http://comics.com/herman"),
+                       (u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
-                            # ("Home and Away", "http://comics.com/home_and_away"),
+                       #(u"Candorville", u"http://www.gocomics.com/candorville"),
-                            # ("It's All About You", "http://comics.com/its_all_about_you"),
+                       #(u"Cathy", u"http://www.gocomics.com/cathy"),
-                            # ("Jane's World", "http://comics.com/janes_world"),
+                       #(u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
-                            # ("Jump Start", "http://comics.com/jump_start"),
+                       #(u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
-                            # ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
+                       #(u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
-                            # ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
+                       #(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
-                            # ("Liberty Meadows", "http://comics.com/liberty_meadows"),
+                       #(u"Cleats", u"http://www.gocomics.com/cleats"),
-                            # ("Little Dog Lost", "http://comics.com/little_dog_lost"),
+                       #(u"Close to Home", u"http://www.gocomics.com/closetohome"),
-                            # ("Lola", "http://comics.com/lola"),
+                       #(u"Committed", u"http://www.gocomics.com/committed"),
-                            # ("Luann", "http://comics.com/luann"),
+                       #(u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
-                            # ("Marmaduke", "http://comics.com/marmaduke"),
+                       #(u"Cornered", u"http://www.gocomics.com/cornered"),
-                            # ("Meg! Classics", "http://comics.com/meg_classics"),
+                       #(u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
-                            # ("Minimum Security", "http://comics.com/minimum_security"),
+                       #(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
-                            # ("Moderately Confused", "http://comics.com/moderately_confused"),
+                       #(u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
-                            # ("Momma", "http://comics.com/momma"),
+                       #(u"Deep Cover", u"http://www.gocomics.com/deepcover"),
-                            # ("Monty", "http://comics.com/monty"),
+                       #(u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
-                            # ("Motley Classics", "http://comics.com/motley_classics"),
+                       #(u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
-                            # ("Nancy", "http://comics.com/nancy"),
+                       #(u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
-                            # ("Natural Selection", "http://comics.com/natural_selection"),
+                       #(u"Doodles", u"http://www.gocomics.com/doodles"),
-                            # ("Nest Heads", "http://comics.com/nest_heads"),
+                       #(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
-                            # ("Off The Mark", "http://comics.com/off_the_mark"),
+                       #(u"Drabble", u"http://www.gocomics.com/drabble"),
-                            # ("On a Claire Day", "http://comics.com/on_a_claire_day"),
+                       #(u"Eek!", u"http://www.gocomics.com/eek"),
-                            # ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
+                       #(u"F Minus", u"http://www.gocomics.com/fminus"),
-                            # ("Over the Hedge", "http://comics.com/over_the_hedge"),
+                       #(u"Family Tree", u"http://www.gocomics.com/familytree"),
-                            # ("PC and Pixel", "http://comics.com/pc_and_pixel"),
+                       #(u"Farcus", u"http://www.gocomics.com/farcus"),
-                            # ("Peanuts", "http://comics.com/peanuts"),
+                       #(u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
-                            # ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
+                       #(u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
-                            # ("Pickles", "http://comics.com/pickles"),
+                       #(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
-                            # ("Prickly City", "http://comics.com/prickly_city"),
+                       #(u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
-                            # ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
+                       (u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
-                            # ("Reality Check", "http://comics.com/reality_check"),
+                       #(u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
-                            # ("Red & Rover", "http://comics.com/red&rover"),
+                       #(u"Fort Knox", u"http://www.gocomics.com/fortknox"),
-                            # ("Rip Haywire", "http://comics.com/rip_haywire"),
+                       #(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
-                            # ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
+                       #(u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
-                            # ("Rose Is Rose", "http://comics.com/rose_is_rose"),
+                       (u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
-                            # ("Rubes", "http://comics.com/rubes"),
+                       #(u"Frazz", u"http://www.gocomics.com/frazz"),
-                            # ("Rudy Park", "http://comics.com/rudy_park"),
+                       #(u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
-                            # ("Scary Gary", "http://comics.com/scary_gary"),
+                       #(u"Free Range", u"http://www.gocomics.com/freerange"),
-                            # ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
+                       #(u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
-                            # ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
+                       #(u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
-                            # ("Speed Bump", "http://comics.com/speed_bump"),
+                       (u"Garfield", u"http://www.gocomics.com/garfield"),
-                            # ("Spot The Frog", "http://comics.com/spot_the_frog"),
+                       #(u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
-                            # ("State of the Union", "http://comics.com/state_of_the_union"),
+                       #(u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
-                            # ("Strange Brew", "http://comics.com/strange_brew"),
+                       (u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
-                            # ("Tarzan Classics", "http://comics.com/tarzan_classics"),
+                       #(u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
-                            # ("That's Life", "http://comics.com/thats_life"),
+                       #(u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
-                            # ("The Barn", "http://comics.com/the_barn"),
+                       #(u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
-                            # ("The Born Loser", "http://comics.com/the_born_loser"),
+                       #(u"Graffiti", u"http://www.gocomics.com/graffiti"),
-                            # ("The Buckets", "http://comics.com/the_buckets"),
+                       #(u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
-                            # ("The Dinette Set", "http://comics.com/the_dinette_set"),
+                       #(u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
-                            # ("The Grizzwells", "http://comics.com/the_grizzwells"),
+                       #(u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
-                            # ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
+                       #(u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
-                            # ("The Knight Life", "http://comics.com/the_knight_life"),
+                       #(u"Home and Away", u"http://www.gocomics.com/homeandaway"),
-                            # ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
+                       #(u"Housebroken", u"http://www.gocomics.com/housebroken"),
-                            # ("The Other Coast", "http://comics.com/the_other_coast"),
+                       #(u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
-                            # ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
+                       #(u"Imagine This", u"http://www.gocomics.com/imaginethis"),
-                            # ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
+                       #(u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
-                            # ("Watch Your Head", "http://comics.com/watch_your_head"),
+                       #(u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
-                            # ("Wizard of Id", "http://comics.com/wizard_of_id"),
+                       #(u"Ink Pen", u"http://www.gocomics.com/inkpen"),
-                            # ("Working Daze", "http://comics.com/working_daze"),
+                       #(u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
-                            # ("Working It Out", "http://comics.com/working_it_out"),
+                       #(u"Jane's World", u"http://www.gocomics.com/janesworld"),
-                            # ("Zack Hill", "http://comics.com/zack_hill"),
+                       #(u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
-                            # ("(Th)ink", "http://comics.com/think"),
+                       #(u"Jump Start", u"http://www.gocomics.com/jumpstart"),
-                            # "Tackling the political and social issues impacting communities of color."
+                       #(u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
-                            # ("Adam Zyglis", "http://comics.com/adam_zyglis"),
+                       #(u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
-                            # "Known for his excellent caricatures, as well as independent and incisive imagery. "
+                       #(u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
-                            # ("Andy Singer", "http://comics.com/andy_singer"),
+                       #(u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
-                            # ("Bill Day", "http://comics.com/bill_day"),
+                       #(u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
-                            # "Powerful images on sensitive issues."
+                       #(u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
-                            # ("Bill Schorr", "http://comics.com/bill_schorr"),
+                       #(u"Lio", u"http://www.gocomics.com/lio"),
-                            # ("Bob Englehart", "http://comics.com/bob_englehart"),
+                       #(u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
-                            # ("Brian Fairrington", "http://comics.com/brian_fairrington"),
+                       #(u"Little Otto", u"http://www.gocomics.com/littleotto"),
-                            # ("Bruce Beattie", "http://comics.com/bruce_beattie"),
+                       #(u"Lola", u"http://www.gocomics.com/lola"),
-                            # ("Cam Cardow", "http://comics.com/cam_cardow"),
+                       #(u"Love Is...", u"http://www.gocomics.com/loveis"),
-                            # ("Chip Bok", "http://comics.com/chip_bok"),
+                       (u"Luann", u"http://www.gocomics.com/luann"),
-                            # ("Chris Britt", "http://comics.com/chris_britt"),
+                       #(u"Maintaining", u"http://www.gocomics.com/maintaining"),
-                            # ("Chuck Asay", "http://comics.com/chuck_asay"),
+                       #(u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
-                            # ("Clay Bennett", "http://comics.com/clay_bennett"),
+                       #(u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
-                            # ("Daryl Cagle", "http://comics.com/daryl_cagle"),
+                       #(u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
-                            # ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
+                       #(u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
-                            # "David Fitzsimmons is a new editorial cartoons on comics.com.  He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
+                       (u"Momma", u"http://www.gocomics.com/momma"),
-                            # ("Drew Litton", "http://comics.com/drew_litton"),
+                       #(u"Monty", u"http://www.gocomics.com/monty"),
-                            # "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
+                       #(u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
-                            # ("Ed Stein", "http://comics.com/ed_stein"),
+                       #(u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
-                            # "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
+                       #(u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
-                            # ("Eric Allie", "http://comics.com/eric_allie"),
+                       #(u"Nancy", u"http://www.gocomics.com/nancy"),
-                            # "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
+                       #(u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
-                            # ("Gary Markstein", "http://comics.com/gary_markstein"),
+                       #(u"Nest Heads", u"http://www.gocomics.com/nestheads"),
-                            # ("Gary McCoy", "http://comics.com/gary_mccoy"),
+                       #(u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
-                            # "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for  Best  Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
+                       #(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
-                            # ("Gary Varvel", "http://comics.com/gary_varvel"),
+                       (u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
-                            # ("Henry Payne", "http://comics.com/henry_payne"),
+                       #(u"Off The Mark", u"http://www.gocomics.com/offthemark"),
-                            # ("JD Crowe", "http://comics.com/jd_crowe"),
+                       #(u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
-                            # ("Jeff Parker", "http://comics.com/jeff_parker"),
+                       #(u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
-                            # ("Jeff Stahler", "http://comics.com/jeff_stahler"),
+                       #(u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
-                            # ("Jerry Holbert", "http://comics.com/jerry_holbert"),
+                       #(u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
-                            # ("John Cole", "http://comics.com/john_cole"),
+                       #(u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
-                            # ("John Darkow", "http://comics.com/john_darkow"),
+                       #(u"Overboard", u"http://www.gocomics.com/overboard"),
-                            # "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for  the Columbia Daily Tribune, Missouri"
+                       #(u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
-                            # ("John Sherffius", "http://comics.com/john_sherffius"),
+                       (u"Peanuts", u"http://www.gocomics.com/peanuts"),
-                            # ("Larry Wright", "http://comics.com/larry_wright"),
+                       (u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"),
-                            # ("Lisa Benson", "http://comics.com/lisa_benson"),
+                       #(u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
-                            # ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
+                       #(u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
-                            # ("Matt Bors", "http://comics.com/matt_bors"),
+                       #(u"Pickles", u"http://www.gocomics.com/pickles"),
-                            # ("Michael Ramirez", "http://comics.com/michael_ramirez"),
+                       #(u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
-                            # ("Mike Keefe", "http://comics.com/mike_keefe"),
+                       #(u"Pluggers", u"http://www.gocomics.com/pluggers"),
-                            # ("Mike Luckovich", "http://comics.com/mike_luckovich"),
+                       (u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
-                            # ("MIke Thompson", "http://comics.com/mike_thompson"),
+                       #(u"PreTeena", u"http://www.gocomics.com/preteena"),
-                            # ("Monte Wolverton", "http://comics.com/monte_wolverton"),
+                       #(u"Prickly City", u"http://www.gocomics.com/pricklycity"),
-                            # "Unique mix of perspectives"
+                       #(u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
-                            # ("Mr. Fish", "http://comics.com/mr_fish"),
+                       #(u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
-                            # "Side effects may include swelling"
+                       #(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
-                            # ("Nate Beeler", "http://comics.com/nate_beeler"),
+                       #(u"Reality Check", u"http://www.gocomics.com/realitycheck"),
-                            # "Middle America meets the Beltway."
+                       #(u"Red and Rover", u"http://www.gocomics.com/redandrover"),
-                            # ("Nick Anderson", "http://comics.com/nick_anderson"),
+                       #(u"Red Meat", u"http://www.gocomics.com/redmeat"),
-                            # ("Pat Bagley", "http://comics.com/pat_bagley"),
+                       #(u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
-                            # "Unfair and Totally Unbalanced."
+                       #(u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
-                            # ("Paul Szep", "http://comics.com/paul_szep"),
+                       #(u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
-                            # ("RJ Matson", "http://comics.com/rj_matson"),
+                       (u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
-                            # "Power cartoons from NYC and Capitol Hill"
+                       #(u"Rudy Park", u"http://www.gocomics.com/rudypark"),
-                            # ("Rob Rogers", "http://comics.com/rob_rogers"),
+                       #(u"Scary Gary", u"http://www.gocomics.com/scarygary"),
-                            # "Humorous slant on current events"
+                       #(u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
-                            # ("Robert Ariail", "http://comics.com/robert_ariail"),
+                       (u"Shoe", u"http://www.gocomics.com/shoe"),
-                            # "Clever and unpredictable"
+                       #(u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
-                            # ("Scott Stantis", "http://comics.com/scott_stantis"),
+                       #(u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
-                            # ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
+                       #(u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
-                            # ("Steve Benson", "http://comics.com/steve_benson"),
+                       #(u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
-                            # ("Steve Breen", "http://comics.com/steve_breen"),
+                       #(u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
-                            # ("Steve Kelley", "http://comics.com/steve_kelley"),
+                       #(u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
-                            # ("Steve Sack", "http://comics.com/steve_sack"),
+                       #(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
                       #(u"Sylvia", u"http://www.gocomics.com/sylvia"),
                       #(u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
                       #(u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
                       #(u"That's Life", u"http://www.gocomics.com/thatslife"),
                       #(u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
                       #(u"The Barn", u"http://www.gocomics.com/thebarn"),
                       #(u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
                       #(u"The Boondocks", u"http://www.gocomics.com/boondocks"),
                       (u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
                       #(u"The Buckets", u"http://www.gocomics.com/thebuckets"),
                       #(u"The City", u"http://www.gocomics.com/thecity"),
                       #(u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
                       #(u"The Doozies", u"http://www.gocomics.com/thedoozies"),
                       #(u"The Duplex", u"http://www.gocomics.com/duplex"),
                       #(u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
                       #(u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
                       #(u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
                       #(u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
                       #(u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
                       #(u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
                       #(u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
                       (u"The Middletons", u"http://www.gocomics.com/themiddletons"),
                       #(u"The Norm", u"http://www.gocomics.com/thenorm"),
                       #(u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
                       #(u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
                       #(u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
                       #(u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
                       #(u"TOBY", u"http://www.gocomics.com/toby"),
                       #(u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
                       #(u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
                       #(u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
                       #(u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
                       #(u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
                       #(u"Wee Pals", u"http://www.gocomics.com/weepals"),
                       #(u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
                       (u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
                       #(u"Working Daze", u"http://www.gocomics.com/workingdaze"),
                       #(u"Working It Out", u"http://www.gocomics.com/workingitout"),
                       #(u"Yenny", u"http://www.gocomics.com/yenny"),
                       #(u"Zack Hill", u"http://www.gocomics.com/zackhill"),
                       #(u"Ziggy", u"http://www.gocomics.com/ziggy"),
                       (u"9 to 5", u"http://www.gocomics.com/9to5"),
                       (u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
                       (u"Herman", u"http://www.gocomics.com/herman"),
                       (u"Loose Parts", u"http://www.gocomics.com/looseparts"),
                       (u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
                       (u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"),
                       (u"Rubes", u"http://www.gocomics.com/rubes"),
                       (u"Speed Bump", u"http://www.gocomics.com/speedbump"),
                       (u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
                       (u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
                       #
                       ######## EDITORIAL CARTOONS #####################
                       #(u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
                       #(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
                       #(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
                       #(u"Bill Day", u"http://www.gocomics.com/billday"),
                       #(u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
                       #(u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
                       #(u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
                       #(u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
                       #(u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
                       #(u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
                       #(u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
                       #(u"Chip Bok",u"http://www.gocomics.com/chipbok"),
                       #(u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
                       #(u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
                       #(u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
                       #(u"Clay Jones",u"http://www.gocomics.com/clayjones"),
                       #(u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
                       #(u"Dana Summers",u"http://www.gocomics.com/danasummers"),
                       #(u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
                       #(u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
                       #(u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
                       #(u"Don Wright",u"http://www.gocomics.com/donwright"),
                       #(u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
                       #(u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
                       #(u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
                       #(u"Ed Stein", u"http://www.gocomics.com/edstein"),
                       #(u"Eric Allie", u"http://www.gocomics.com/ericallie"),
                       #(u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
                       #(u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
                       #(u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
                       #(u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
                       #(u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
                       #(u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
                       #(u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
                       #(u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
                       #(u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
                       #(u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
                       #(u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
                       #(u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
                       #(u"Joel Pett",u"http://www.gocomics.com/joelpett"),
                       #(u"John Cole", u"http://www.gocomics.com/johncole"),
                       #(u"John Darkow", u"http://www.gocomics.com/johndarkow"),
                       #(u"John Deering",u"http://www.gocomics.com/johndeering"),
                       #(u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
                       #(u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
                       #(u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
                       #(u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
                       #(u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
                       #(u"Larry Wright", u"http://www.gocomics.com/larrywright"),
                       #(u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
                       #(u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
                       #(u"Matt Bors", u"http://www.gocomics.com/mattbors"),
                       #(u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
                       #(u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
                       #(u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
                       #(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
                       #(u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
                       #(u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
                       #(u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
                       #(u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
                       #(u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
                       #(u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
                       #(u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
                       #(u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
                       #(u"Paul Szep", u"http://www.gocomics.com/paulszep"),
                       #(u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
                       #(u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
                       #(u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
                       #(u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
                       #(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
                       #(u"Small World",u"http://www.gocomics.com/smallworld"),
                       #(u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
                       #(u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
                       #(u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
                       #(u"Steve Sack", u"http://www.gocomics.com/stevesack"),
                       #(u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
                       #(u"Ted Rall",u"http://www.gocomics.com/tedrall"),
                       #(u"(Th)ink", u"http://www.gocomics.com/think"),
                       #(u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
                       #(u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
                       #(u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
                       #(u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
                       #(u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
                       #(u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
                       #(u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
                       #(u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
                       #(u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
                       #(u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
                       #(u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
                       #(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
                       #(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
                             ]:
            print 'Working on: ', title
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds
    def make_links(self, url):
-        soup = self.index_to_soup(url)
+        title = 'Temp'
        # print 'soup: ', soup
        title = ''
        current_articles = []
-        from datetime import datetime, timedelta
+        pages = range(1, self.num_comics_to_get+1)
-        now = datetime.now()
+        for page in pages:
-        dates = [(now-timedelta(days=d)).strftime('%Y/%m/%d') for d in range(self.num_comics_to_get)]
+            page_soup = self.index_to_soup(url)
-
+            if page_soup:
-        for page in dates:
+                try:
-            page_url = url + '/' + str(page)
+                    strip_title = page_soup.find(name='div', attrs={'class':'top'}).h1.a.string
-            print(page_url)
+                except:
-            soup = self.index_to_soup(page_url)
+                    strip_title = 'Error - no Title found'
-            if soup:
+                try:
-                strip_tag = self.tag_to_string(soup.find('a'))
+                    date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
-                if strip_tag:
+                    if not date_title:
-                  print 'strip_tag: ', strip_tag
+                        date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
-                  title = strip_tag
+                except:
-                  print 'title: ', title
+                    date_title = 'Error - no Date found'
                title = strip_title + ' - ' + date_title
                for i in range(2):
                    try:
                        strip_url_date = page_soup.find(name='div', attrs={'class':'top'}).h1.a['href']
                        break  # success - this is normal exit
                    except:
                        strip_url_date = None
                        continue  # try to get strip_url_date again
                for i in range(2):
                    try:
                        prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
                        break  # success - this is normal exit
                    except:
                        prev_strip_url_date = None
                        continue  # try to get prev_strip_url_date again
                if strip_url_date:
                    page_url = 'http://www.gocomics.com' + strip_url_date
                else:
                    continue
                if prev_strip_url_date:
                    prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date
                else:
                    continue
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
            url = prev_page_url
        current_articles.reverse()
        return current_articles
    def preprocess_html(self, soup):
        if soup.title:
            title_string = soup.title.string.strip()
            _cd = title_string.split(',',1)[1]
            comic_date = ' '.join(_cd.split(' ', 4)[0:-1])
        if soup.h1.span:
            artist = soup.h1.span.string
            soup.h1.span.string.replaceWith(comic_date + artist)
        feature_item = soup.find('p',attrs={'class':'feature_item'})
        if feature_item.a:
            a_tag = feature_item.a
            a_href = a_tag["href"]
            img_tag = a_tag.img
            img_tag["src"] = a_href
            img_tag["width"] = self.comic_size
            img_tag["height"] = None
        return self.adeify_images(soup)
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    img {max-width:100%; min-width:100%;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
    '''
--- a/recipes/kdefamily_pl.recipe
+++ b/recipes/kdefamily_pl.recipe
@ -12,5 +12,6 @@ class KDEFamilyPl(BasicNewsRecipe):
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = True
    feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
--- a/recipes/legeartis.recipe
+++ b/recipes/legeartis.recipe
@ -21,7 +21,7 @@ class LegeArtisRecipe(BasicNewsRecipe):
    no_stylesheets = True
    remove_javascript = True
-
+    remove_empty_feeds = True
    extra_css = '''
            img{clear: both;}
    '''
--- a/recipes/lomza.recipe
+++ b/recipes/lomza.recipe
@ -8,6 +8,7 @@ class Lomza(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 15
    no_stylesheets = True
    extra_css = '#foto {float: right; max-width: 200px; margin-left: 10px;} #fotogaleria > div {float:left;} .br {clear: both;}'
    max_articles_per_feed = 100
    remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})]
    keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})]
--- a/recipes/nrc_next.recipe
+++ b/recipes/nrc_next.recipe
@ -0,0 +1,75 @@
 #!/usr/bin/env  python2
 # -*- coding: utf-8 -*-
 # Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
 __license__   = 'GPL v3'
 __copyright__ = '2013, Niels Giesen'
 '''
 www.nrc.nl
 '''
 import os, zipfile
 import time
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
 class NRCNext(BasicNewsRecipe):
    title = u'nrc•next'
    description = u'De ePaper-versie van nrc•next'
    language = 'nl'
    lang = 'nl-NL'
    needs_subscription = True
    __author__ = 'Niels Giesen'
    conversion_options = {
        'no_default_epub_cover' : True
    }
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
            br.open('http://login.nrc.nl/login')
            br.select_form(nr=0)
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
        return br
    def build_index(self):
        today = time.strftime("%Y%m%d")
        domain = "http://digitaleeditie.nrc.nl"
        url = domain + "/digitaleeditie/helekrant/epub/nn_" + today + ".epub"
        #print url
        try:
            br = self.get_browser()
            f = br.open(url)
        except:
            self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
            raise ValueError('Krant van vandaag nog niet beschikbaar')
        tmp = PersistentTemporaryFile(suffix='.epub')
        self.report_progress(0,_('downloading epub'))
        tmp.write(f.read())
        f.close()
        br.close()
        if zipfile.is_zipfile(tmp):
            try:
                zfile = zipfile.ZipFile(tmp.name, 'r')
                zfile.extractall(self.output_dir)
                self.report_progress(0,_('extracting epub'))
            except zipfile.BadZipfile:
                self.report_progress(0,_('BadZip error, continuing'))
        tmp.close()
        index = os.path.join(self.output_dir, 'metadata.opf')
        self.report_progress(1,_('epub downloaded and extracted'))
        return index
--- a/recipes/nsfw_corp.recipe
+++ b/recipes/nsfw_corp.recipe
@ -1,11 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2012-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nsfwcorp.com
 '''
 import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
 class NotSafeForWork(BasicNewsRecipe):
@ -20,8 +18,8 @@ class NotSafeForWork(BasicNewsRecipe):
    needs_subscription     = True
    auto_cleanup           = False
    INDEX                  = 'https://www.nsfwcorp.com'
-    LOGIN                  = INDEX + '/login/target/'
+    LOGIN                  = INDEX + '/account/login/?next=%2F'
-    SETTINGS               = INDEX + '/settings/'
+    SETTINGS               = INDEX + '/account/settings/'
    use_embedded_content   = True
    language               = 'en'
    publication_type       = 'magazine'
@ -48,19 +46,20 @@ class NotSafeForWork(BasicNewsRecipe):
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
-        br.open(self.LOGIN)
+        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({ 'email':self.username
+            br.open(self.LOGIN)
-                                     ,'password':self.password
+            br.select_form(nr=0)
-                                   })
+            br['email'   ] = self.username
-            br.open(self.LOGIN, data)
+            br['password'] = self.password
            br.submit()
        return br
    def get_feeds(self):
        self.feeds = []
        soup = self.index_to_soup(self.SETTINGS)
        for item in soup.findAll('input', attrs={'type':'text'}):
-            if item.has_key('value') and item['value'].startswith('http://www.nsfwcorp.com/feed/'):
+            if item.has_key('value') and item['value'].startswith('https://www.nsfwcorp.com/feed/'):
               self.feeds.append(item['value'])
               return self.feeds
        return self.feeds
--- a/recipes/pc_lab.recipe
+++ b/recipes/pc_lab.recipe
@ -1,6 +1,6 @@
 #!/usr/bin/env  python
 from calibre.web.feeds.recipes import BasicNewsRecipe
-
+from calibre.ebooks.BeautifulSoup import Comment
 class PCLab(BasicNewsRecipe):
    cover_url             = 'http://pclab.pl/img/logo.png'
    title                 = u"PC Lab"
@ -52,6 +52,9 @@ class PCLab(BasicNewsRecipe):
            pager = soup2.find('div', attrs={'class':'next'})
            pagetext = soup2.find('div', attrs={'class':'substance'})
            pagetext = pagetext.find('div', attrs={'class':'data'})
            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
            for comment in comments:
                comment.extract()
            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
--- a/recipes/swiatkindle.recipe
+++ b/recipes/swiatkindle.recipe
@ -10,7 +10,7 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class swiatczytnikow(BasicNewsRecipe):
-    title          = u'Swiat Czytnikow'
+    title          = u'Świat Czytników'
    description    = u'Czytniki e-książek w Polsce. Jak wybrać, kupić i korzystać z Amazon Kindle i innych'
    language = 'pl'
    __author__ = u'Tomasz D\u0142ugosz'
--- a/recipes/weblogs_sl.recipe
+++ b/recipes/weblogs_sl.recipe
@ -3,7 +3,7 @@ __license__     = 'GPL v3'
 __copyright__   = '4 February 2011, desUBIKado'
 __author__      = 'desUBIKado'
 __version__     = 'v0.09'
-__date__        = '02, December 2012'
+__date__        = '14, May 2013'
 '''
 http://www.weblogssl.com/
 '''
@ -56,15 +56,16 @@ class weblogssl(BasicNewsRecipe):
                          ,(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom')
                          ,(u'Fandemia', u'http://feeds.weblogssl.com/fandemia')
                          ,(u'Tendencias', u'http://feeds.weblogssl.com/trendencias')
-                          ,(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas')
+                          ,(u'Tendencias Belleza', u'http://feeds.weblogssl.com/trendenciasbelleza')
                          ,(u'Tendencias Hombre', u'http://feeds.weblogssl.com/trendenciashombre')
                          ,(u'Tendencias Shopping', u'http://feeds.weblogssl.com/trendenciasshopping')
                          ,(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar')
                          ,(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion')
                          ,(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera')
                          ,(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia')
                          ,(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica')
                          ,(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg')
-                          ,(u'Tendencias Belleza', u'http://feeds.weblogssl.com/trendenciasbelleza')
+                          ,(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas')
                          ,(u'Tendencias Hombre', u'http://feeds.weblogssl.com/trendenciashombre')
                          ,(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas')
                          ,(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion')
                          ,(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1')
@ -119,23 +120,6 @@ class weblogssl(BasicNewsRecipe):
        return soup
    # Para obtener la url original del articulo a partir de la de "feedsportal"
    # El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
    # http://www.mobileread.com/forums/showthread.php?t=130297
    def get_article_url(self, article):
       link = article.get('link', None)
       if link is None:
           return article
       # if link.split('/')[-4]=="xataka2":
       #     return article.get('feedburner_origlink', article.get('link', article.get('guid')))
       if link.split('/')[-4]=="xataka2":
           return article.get('guid', None)
       if link.split('/')[-1]=="story01.htm":
           link=link.split('/')[-2]
           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
           b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
           for i in range(0,len(a)):
              link=link.replace(a[i],b[i])
           link="http://"+link
       return link
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -32,7 +32,7 @@ defaults.
 # Set the use_series_auto_increment_tweak_when_importing tweak to True to
 # use the above values when importing/adding books. If this tweak is set to
 # False (the default) then the series number will be set to 1 if it is not
-# explicitly set to during the import. If set to True, then the
+# explicitly set during the import. If set to True, then the
 # series index will be set according to the series_index_auto_increment setting.
 # Note that the use_series_auto_increment_tweak_when_importing tweak is used
 # only when a value is not provided during import. If the importing regular
@ -536,3 +536,4 @@ many_libraries = 10
 # yellow when using a Virtual Library. By setting this to False, you can turn
 # that off.
 highlight_virtual_library_book_count = True
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -38,7 +38,7 @@ binary_includes = [
                '/lib/libz.so.1',
                '/usr/lib/libtiff.so.5',
                '/lib/libbz2.so.1',
-                '/usr/lib/libpoppler.so.28',
+                '/usr/lib/libpoppler.so.37',
                '/usr/lib/libxml2.so.2',
                '/usr/lib/libopenjpeg.so.2',
                '/usr/lib/libxslt.so.1',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -378,7 +378,7 @@ class Py2App(object):
    @flush
    def add_poppler(self):
        info('\nAdding poppler')
-        for x in ('libpoppler.28.dylib',):
+        for x in ('libpoppler.37.dylib',):
            self.install_dylib(os.path.join(SW, 'lib', x))
        for x in ('pdftohtml', 'pdftoppm', 'pdfinfo'):
            self.install_dylib(os.path.join(SW, 'bin', x), False)
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -116,7 +116,9 @@ tarball. Edit setup.py and set zip_safe=False. Then run::
 Run the following command to install python dependencies::
-    easy_install --always-unzip -U mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto cssselect
+    easy_install --always-unzip -U mechanize python-dateutil dnspython cssutils clientform pycrypto cssselect
 Install pyreadline from https://pypi.python.org/pypi/pyreadline/2.0
 Install pywin32 and edit win32com\__init__.py setting _frozen = True and
 __gen_path__ to a temp dir (otherwise it tries to set it to a dir in the
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,14 +12,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-04-21 08:00+0000\n"
+"PO-Revision-Date: 2013-05-06 09:36+0000\n"
 "Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-04-22 05:23+0000\n"
+"X-Launchpad-Export-Date: 2013-05-07 05:28+0000\n"
-"X-Generator: Launchpad (build 16567)\n"
+"X-Generator: Launchpad (build 16598)\n"
 "Language: ca\n"
 #. name for aaa
@ -2024,7 +2024,7 @@ msgstr "Àzeri meridional"
 #. name for aze
 msgid "Azerbaijani"
-msgstr "Serbi"
+msgstr ""
 #. name for azg
 msgid "Amuzgo; San Pedro Amuzgos"
@ -7288,7 +7288,7 @@ msgstr "Epie"
 #. name for epo
 msgid "Esperanto"
-msgstr "Alemany"
+msgstr "Esperanto"
 #. name for era
 msgid "Eravallan"
@ -21816,7 +21816,7 @@ msgstr "Ramoaaina"
 #. name for raj
 msgid "Rajasthani"
-msgstr "Marwari"
+msgstr ""
 #. name for rak
 msgid "Tulu-Bohuai"
--- a/setup/iso_639/cs.po
+++ b/setup/iso_639/cs.po
@ -13762,7 +13762,7 @@ msgstr ""
 #. name for lav
 msgid "Latvian"
-msgstr "litevština"
+msgstr ""
 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/da.po
+++ b/setup/iso_639/da.po
@ -1429,7 +1429,7 @@ msgstr ""
 #. name for arg
 msgid "Aragonese"
-msgstr "Færøsk"
+msgstr ""
 #. name for arh
 msgid "Arhuaco"
--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
@ -18,14 +18,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-04-11 13:29+0000\n"
+"PO-Revision-Date: 2013-05-06 09:41+0000\n"
 "Last-Translator: Simon Schütte <simonschuette@arcor.de>\n"
 "Language-Team: Ubuntu German Translators\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-04-12 05:20+0000\n"
+"X-Launchpad-Export-Date: 2013-05-07 05:29+0000\n"
-"X-Generator: Launchpad (build 16564)\n"
+"X-Generator: Launchpad (build 16598)\n"
 "Language: de\n"
 #. name for aaa
@ -319,7 +319,7 @@ msgstr "Adangme"
 #. name for adb
 msgid "Adabe"
-msgstr "Adangme"
+msgstr "Adabe"
 #. name for add
 msgid "Dzodinka"
@ -367,7 +367,7 @@ msgstr "Adap"
 #. name for adq
 msgid "Adangbe"
-msgstr "Adangme"
+msgstr "Adangbe"
 #. name for adr
 msgid "Adonara"
--- a/setup/iso_639/eu.po
+++ b/setup/iso_639/eu.po
@ -2022,7 +2022,7 @@ msgstr ""
 #. name for aze
 msgid "Azerbaijani"
-msgstr "Turkiera"
+msgstr ""
 #. name for azg
 msgid "Amuzgo; San Pedro Amuzgos"
@ -13126,7 +13126,7 @@ msgstr ""
 #. name for kur
 msgid "Kurdish"
-msgstr "Turkiera"
+msgstr ""
 #. name for kus
 msgid "Kusaal"
@ -16190,7 +16190,7 @@ msgstr ""
 #. name for mlt
 msgid "Maltese"
-msgstr "Koreera"
+msgstr ""
 #. name for mlu
 msgid "To'abaita"
--- a/setup/iso_639/gl.po
+++ b/setup/iso_639/gl.po
@ -13764,7 +13764,7 @@ msgstr "Laba"
 #. name for lav
 msgid "Latvian"
-msgstr "Lituano"
+msgstr ""
 #. name for law
 msgid "Lauje"
@ -22212,7 +22212,7 @@ msgstr "Roglai do norte"
 #. name for roh
 msgid "Romansh"
-msgstr "Romanés"
+msgstr ""
 #. name for rol
 msgid "Romblomanon"
--- a/setup/iso_639/hu.po
+++ b/setup/iso_639/hu.po
@ -20538,7 +20538,7 @@ msgstr ""
 #. name for peo
 msgid "Persian; Old (ca. 600-400 B.C.)"
-msgstr "perzsa"
+msgstr ""
 #. name for pep
 msgid "Kunja"
--- a/setup/iso_639/is.po
+++ b/setup/iso_639/is.po
@ -15049,7 +15049,7 @@ msgstr "Magahi"
 #. name for mah
 msgid "Marshallese"
-msgstr "Maltneska"
+msgstr ""
 #. name for mai
 msgid "Maithili"
--- a/setup/iso_639/ko.po
+++ b/setup/iso_639/ko.po
@ -3742,7 +3742,7 @@ msgstr ""
 #. name for bre
 msgid "Breton"
-msgstr "프랑스어"
+msgstr ""
 #. name for brf
 msgid "Bera"
--- a/setup/iso_639/mr.po
+++ b/setup/iso_639/mr.po
@ -6804,7 +6804,7 @@ msgstr "डोगोन; तेबुल उरे"
 #. name for dua
 msgid "Duala"
-msgstr "ड्युला"
+msgstr ""
 #. name for dub
 msgid "Dubli"
--- a/setup/iso_639/nb.po
+++ b/setup/iso_639/nb.po
@ -27790,7 +27790,7 @@ msgstr ""
 #. name for wln
 msgid "Walloon"
-msgstr "Vietnamesisk"
+msgstr ""
 #. name for wlo
 msgid "Wolio"
--- a/setup/iso_639/oc.po
+++ b/setup/iso_639/oc.po
@ -9862,7 +9862,7 @@ msgstr "Hya"
 #. name for hye
 msgid "Armenian"
-msgstr "Albanés"
+msgstr ""
 #. name for iai
 msgid "Iaai"
@ -13762,7 +13762,7 @@ msgstr "Laba"
 #. name for lav
 msgid "Latvian"
-msgstr "Lituanian"
+msgstr ""
 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/ru.po
+++ b/setup/iso_639/ru.po
@ -2089,7 +2089,7 @@ msgstr "Башкирский"
 #. name for bal
 msgid "Baluchi"
-msgstr "Балийский"
+msgstr ""
 #. name for bam
 msgid "Bambara"
--- a/setup/iso_639/sk.po
+++ b/setup/iso_639/sk.po
@ -13763,7 +13763,7 @@ msgstr ""
 #. name for lav
 msgid "Latvian"
-msgstr "Lotyšský"
+msgstr ""
 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/sv.po
+++ b/setup/iso_639/sv.po
--- a/setup/iso_639/zh_CN.po
+++ b/setup/iso_639/zh_CN.po
@ -1016,7 +1016,7 @@ msgstr ""
 #. name for amh
 msgid "Amharic"
-msgstr "阿拉伯语"
+msgstr ""
 #. name for ami
 msgid "Amis"
--- a/setup/translations.py
+++ b/setup/translations.py
@ -63,7 +63,6 @@ class POT(Command): # {{{
        return '\n'.join(ans)
    def run(self, opts):
        pot_header = textwrap.dedent('''\
        # Translation template file..
@ -117,7 +116,6 @@ class POT(Command): # {{{
                f.write(src)
            self.info('Translations template:', os.path.abspath(pot))
        return pot
 # }}}
@ -134,6 +132,7 @@ class Translations(POT): # {{{
        return locale, os.path.join(self.DEST, locale, 'messages.mo')
    def run(self, opts):
        self.iso639_errors = []
        for f in self.po_files():
            locale, dest = self.mo_file(f)
            base = os.path.dirname(dest)
@ -146,18 +145,46 @@ class Translations(POT): # {{{
                    '%s.po'%iscpo)
            if os.path.exists(iso639):
                self.check_iso639(iso639)
                dest = self.j(self.d(dest), 'iso639.mo')
                if self.newer(dest, iso639):
-                    self.info('\tCopying ISO 639 translations')
+                    self.info('\tCopying ISO 639 translations for %s' % iscpo)
                    subprocess.check_call(['msgfmt', '-o', dest, iso639])
            elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc',
                    'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku',
                    'fr_CA', 'him', 'jv', 'ka', 'fur', 'ber'):
                self.warn('No ISO 639 translations for locale:', locale)
        if self.iso639_errors:
            for err in self.iso639_errors:
                print (err)
            raise SystemExit(1)
        self.write_stats()
        self.freeze_locales()
    def check_iso639(self, path):
        from calibre.utils.localization import langnames_to_langcodes
        with open(path, 'rb') as f:
            raw = f.read()
        rmap = {}
        msgid = None
        for match in re.finditer(r'^(msgid|msgstr)\s+"(.*?)"', raw, re.M):
            if match.group(1) == 'msgid':
                msgid = match.group(2)
            else:
                msgstr = match.group(2)
                if not msgstr:
                    continue
                omsgid = rmap.get(msgstr, None)
                if omsgid is not None:
                    cm = langnames_to_langcodes([omsgid, msgid])
                    if cm[msgid] and cm[omsgid] and cm[msgid] != cm[omsgid]:
                        self.iso639_errors.append('In file %s the name %s is used as translation for both %s and %s' % (
                            os.path.basename(path), msgstr, msgid, rmap[msgstr]))
                    # raise SystemExit(1)
                rmap[msgstr] = msgid
    def freeze_locales(self):
        zf = self.DEST + '.zip'
        from calibre import CurrentDir
@ -191,7 +218,6 @@ class Translations(POT): # {{{
            locale = self.mo_file(f)[0]
            stats[locale] = min(1.0, float(trans)/total)
        import cPickle
        cPickle.dump(stats, open(dest, 'wb'), -1)
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 29)
+numeric_version = (0, 9, 30)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"
@ -66,10 +66,8 @@ else:
            filesystem_encoding = 'utf-8'
            # On linux, unicode arguments to os file functions are coerced to an ascii
            # bytestring if sys.getfilesystemencoding() == 'ascii', which is
-            # just plain dumb. So issue a warning.
+            # just plain dumb. This is fixed by the icu.py module which, when
-            print ('WARNING: You do not have the LANG environment variable set correctly. '
+            # imported changes ascii to utf-8
                    'This will cause problems with non-ascii filenames. '
                    'Set it to something like en_US.UTF-8.\n')
    except:
        filesystem_encoding = 'utf-8'
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1548,12 +1548,13 @@ class StoreNextoStore(StoreBase):
 class StoreNookUKStore(StoreBase):
    name = 'Nook UK'
-    author = 'John Schember'
+    author = 'Charles Haley'
-    description = u'Barnes & Noble S.Ã  r.l, a subsidiary of Barnes & Noble, Inc., a leading retailer of content, digital media and educational products, is proud to bring the award-winning NOOKÂ® reading experience and a leading digital bookstore to the UK.'  # noqa
+    description = u'Barnes & Noble S.A.R.L, a subsidiary of Barnes & Noble, Inc., a leading retailer of content, digital media and educational products, is proud to bring the award-winning NOOK reading experience and a leading digital bookstore to the UK.'  # noqa
    actual_plugin = 'calibre.gui2.store.stores.nook_uk_plugin:NookUKStore'
    headquarters = 'UK'
    formats = ['NOOK']
    affiliate = True
 class StoreOpenBooksStore(StoreBase):
    name = 'Open Books'
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -240,7 +240,8 @@ class ANDROID(USBMS):
            'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
            'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
            'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS',
-            'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894']
+            'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894', '_USB',
    ]
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -251,7 +252,9 @@ class ANDROID(USBMS):
            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875',
            'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
            'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
-            'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894']
+            'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894',
            '_USB',
    ]
    OSX_MAIN_MEM = 'Android Device Main Memory'
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -19,10 +19,10 @@ class BLACKBERRY(USBMS):
    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]
+    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220, 0x232]
    VENDOR_NAME = 'RIM'
-    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['BLACKBERRY_SD', 'BLACKBERRY']
    MAIN_MEMORY_VOLUME_LABEL  = 'Blackberry SD Card'
--- a/src/calibre/devices/idevice/init.py
+++ b/src/calibre/devices/idevice/init.py
@ -0,0 +1,2 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
--- a/src/calibre/devices/idevice/libimobiledevice.py
+++ b/src/calibre/devices/idevice/libimobiledevice.py
--- a/src/calibre/devices/idevice/parse_xml.py
+++ b/src/calibre/devices/idevice/parse_xml.py
@ -0,0 +1,300 @@
 #!/usr/bin/env python
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 """
 https://github.com/ishikawa/python-plist-parser/blob/master/plist_parser.py
 A `Property Lists`_ is a data representation used in Apple's Mac OS X as
 a convenient way to store standard object types, such as string, number,
 boolean, and container object.
 This file contains a class ``XmlPropertyListParser`` for parse
 a property list file and get back a python native data structure.
    :copyright: 2008 by Takanori Ishikawa <takanori.ishikawa@gmail.com>
    :license: MIT (See LICENSE file for more details)
 .. _Property Lists: http://developer.apple.com/documentation/Cocoa/Conceptual/PropertyLists/
 """
 class PropertyListParseError(Exception):
    """Raised when parsing a property list is failed."""
    pass
 class XmlPropertyListParser(object):
    """
    The ``XmlPropertyListParser`` class provides methods that
    convert `Property Lists`_ objects from xml format.
    Property list objects include ``string``, ``unicode``,
    ``list``, ``dict``, ``datetime``, and ``int`` or ``float``.
        :copyright: 2008 by Takanori Ishikawa <takanori.ishikawa@gmail.com>
        :license: MIT License
    .. _Property List: http://developer.apple.com/documentation/Cocoa/Conceptual/PropertyLists/
    """
    def _assert(self, test, message):
        if not test:
            raise PropertyListParseError(message)
    # ------------------------------------------------
    # SAX2: ContentHandler
    # ------------------------------------------------
    def setDocumentLocator(self, locator):
        pass
    def startPrefixMapping(self, prefix, uri):
        pass
    def endPrefixMapping(self, prefix):
        pass
    def startElementNS(self, name, qname, attrs):
        pass
    def endElementNS(self, name, qname):
        pass
    def ignorableWhitespace(self, whitespace):
        pass
    def processingInstruction(self, target, data):
        pass
    def skippedEntity(self, name):
        pass
    def startDocument(self):
        self.__stack = []
        self.__plist = self.__key = self.__characters = None
        # For reducing runtime type checking,
        # the parser caches top level object type.
        self.__in_dict = False
    def endDocument(self):
        self._assert(self.__plist is not None, "A top level element must be <plist>.")
        self._assert(
            len(self.__stack) is 0,
            "multiple objects at top level.")
    def startElement(self, name, attributes):
        if name in XmlPropertyListParser.START_CALLBACKS:
            XmlPropertyListParser.START_CALLBACKS[name](self, name, attributes)
        if name in XmlPropertyListParser.PARSE_CALLBACKS:
            self.__characters = []
    def endElement(self, name):
        if name in XmlPropertyListParser.END_CALLBACKS:
            XmlPropertyListParser.END_CALLBACKS[name](self, name)
        if name in XmlPropertyListParser.PARSE_CALLBACKS:
            # Creates character string from buffered characters.
            content = ''.join(self.__characters)
            # For compatibility with ``xml.etree`` and ``plistlib``,
            # convert text string to ascii, if possible
            try:
                content = content.encode('ascii')
            except (UnicodeError, AttributeError):
                pass
            XmlPropertyListParser.PARSE_CALLBACKS[name](self, name, content)
            self.__characters = None
    def characters(self, content):
        if self.__characters is not None:
            self.__characters.append(content)
    # ------------------------------------------------
    # XmlPropertyListParser private
    # ------------------------------------------------
    def _push_value(self, value):
        if not self.__stack:
            self._assert(self.__plist is None, "Multiple objects at top level")
            self.__plist = value
        else:
            top = self.__stack[-1]
            #assert isinstance(top, (dict, list))
            if self.__in_dict:
                k = self.__key
                if k is None:
                    raise PropertyListParseError("Missing key for dictionary.")
                top[k] = value
                self.__key = None
            else:
                top.append(value)
    def _push_stack(self, value):
        self.__stack.append(value)
        self.__in_dict = isinstance(value, dict)
    def _pop_stack(self):
        self.__stack.pop()
        self.__in_dict = self.__stack and isinstance(self.__stack[-1], dict)
    def _start_plist(self, name, attrs):
        self._assert(not self.__stack and self.__plist is None, "<plist> more than once.")
        self._assert(attrs.get('version', '1.0') == '1.0',
                     "version 1.0 is only supported, but was '%s'." % attrs.get('version'))
    def _start_array(self, name, attrs):
        v = list()
        self._push_value(v)
        self._push_stack(v)
    def _start_dict(self, name, attrs):
        v = dict()
        self._push_value(v)
        self._push_stack(v)
    def _end_array(self, name):
        self._pop_stack()
    def _end_dict(self, name):
        if self.__key is not None:
            raise PropertyListParseError("Missing value for key '%s'" % self.__key)
        self._pop_stack()
    def _start_true(self, name, attrs):
        self._push_value(True)
    def _start_false(self, name, attrs):
        self._push_value(False)
    def _parse_key(self, name, content):
        if not self.__in_dict:
            print("XmlPropertyListParser() WARNING: ignoring <key>%s</key> (<key> elements must be contained in <dict> element)" % content)
            #raise PropertyListParseError("<key> element '%s' must be in <dict> element." % content)
        else:
            self.__key = content
    def _parse_string(self, name, content):
        self._push_value(content)
    def _parse_data(self, name, content):
        import base64
        self._push_value(base64.b64decode(content))
    # http://www.apple.com/DTDs/PropertyList-1.0.dtd says:
    #
    # Contents should conform to a subset of ISO 8601
    # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.
    # Smaller units may be omitted with a loss of precision)
    import re
    DATETIME_PATTERN = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z$")
    def _parse_date(self, name, content):
        import datetime
        units = ('year', 'month', 'day', 'hour', 'minute', 'second', )
        pattern = XmlPropertyListParser.DATETIME_PATTERN
        match = pattern.match(content)
        if not match:
            raise PropertyListParseError("Failed to parse datetime '%s'" % content)
        groups, components = match.groupdict(), []
        for key in units:
            value = groups[key]
            if value is None:
                break
            components.append(int(value))
        while len(components) < 3:
            components.append(1)
        d = datetime.datetime(*components)
        self._push_value(d)
    def _parse_real(self, name, content):
        self._push_value(float(content))
    def _parse_integer(self, name, content):
        self._push_value(int(content))
    START_CALLBACKS = {
        'plist': _start_plist,
        'array': _start_array,
        'dict': _start_dict,
        'true': _start_true,
        'false': _start_false,
    }
    END_CALLBACKS = {
        'array': _end_array,
        'dict': _end_dict,
    }
    PARSE_CALLBACKS = {
        'key': _parse_key,
        'string': _parse_string,
        'data': _parse_data,
        'date': _parse_date,
        'real': _parse_real,
        'integer': _parse_integer,
    }
    # ------------------------------------------------
    # XmlPropertyListParser
    # ------------------------------------------------
    def _to_stream(self, io_or_string):
        if isinstance(io_or_string, basestring):
            # Creates a string stream for in-memory contents.
            from cStringIO import StringIO
            return StringIO(io_or_string)
        elif hasattr(io_or_string, 'read') and callable(getattr(io_or_string, 'read')):
            return io_or_string
        else:
            raise TypeError('Can\'t convert %s to file-like-object' % type(io_or_string))
    def _parse_using_etree(self, xml_input):
        from xml.etree.cElementTree import iterparse
        parser = iterparse(self._to_stream(xml_input), events=(b'start', b'end'))
        self.startDocument()
        try:
            for action, element in parser:
                name = element.tag
                if action == 'start':
                    if name in XmlPropertyListParser.START_CALLBACKS:
                        XmlPropertyListParser.START_CALLBACKS[name](self, element.tag, element.attrib)
                elif action == 'end':
                    if name in XmlPropertyListParser.END_CALLBACKS:
                        XmlPropertyListParser.END_CALLBACKS[name](self, name)
                    if name in XmlPropertyListParser.PARSE_CALLBACKS:
                        XmlPropertyListParser.PARSE_CALLBACKS[name](self, name, element.text or "")
                    element.clear()
        except SyntaxError, e:
            raise PropertyListParseError(e)
        self.endDocument()
        return self.__plist
    def _parse_using_sax_parser(self, xml_input):
        from xml.sax import make_parser, xmlreader, SAXParseException
        source = xmlreader.InputSource()
        source.setByteStream(self._to_stream(xml_input))
        reader = make_parser()
        reader.setContentHandler(self)
        try:
            reader.parse(source)
        except SAXParseException, e:
            raise PropertyListParseError(e)
        return self.__plist
    def parse(self, xml_input):
        """
        Parse the property list (`.plist`, `.xml, for example) ``xml_input``,
        which can be either a string or a file-like object.
        >>> parser = XmlPropertyListParser()
        >>> parser.parse(r'<plist version="1.0">'
        ...              r'<dict><key>Python</key><string>.py</string></dict>'
        ...              r'</plist>')
        {'Python': '.py'}
        """
        try:
            return self._parse_using_etree(xml_input)
        except ImportError:
            # No xml.etree.ccElementTree found.
            return self._parse_using_sax_parser(xml_input)
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -107,6 +107,12 @@ class DevicePlugin(Plugin):
    #: :meth:`set_user_blacklisted_devices`
    ASK_TO_ALLOW_CONNECT = False
    #: Set this to a dictionary of the form {'title':title, 'msg':msg, 'det_msg':detailed_msg} to have calibre popup
    #: a message to the user after some callbacks are run (currently only upload_books).
    #: Be careful to not spam the user with too many messages. This variable is checked after *every* callback,
    #: so only set it when you really need to.
    user_feedback_after_callback = None
    @classmethod
    def get_gui_name(cls):
        if hasattr(cls, 'gui_name'):
@ -165,8 +171,7 @@ class DevicePlugin(Plugin):
                                            'rev_')[-1].replace(':', 'a'), 16)
                            except:
                                bcd = None
-                           return True, (vendor_id, product_id, bcd, None,
+                            return True, (vendor_id, product_id, bcd, None, None, None)
                                   None, None)
        return False, None
    def test_bcd(self, bcdDevice, bcd):
@ -638,7 +643,6 @@ class DevicePlugin(Plugin):
        '''
        device_prefs.set_overrides()
    # Dynamic control interface.
    # The following methods are probably called on the GUI thread. Any driver
    # that implements these methods must take pains to be thread safe, because
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -35,7 +35,7 @@ class KOBO(USBMS):
    gui_name = 'Kobo Reader'
    description = _('Communicate with the Kobo Reader')
    author = 'Timothy Legge and David Forrester'
-    version = (2, 0, 9)
+    version = (2, 0, 10)
    dbversion = 0
    fwversion = 0
@ -45,6 +45,7 @@ class KOBO(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']
    booklist_class = CollectionsBookList
    book_class = Book
    # Ordered list of supported formats
    FORMATS     = ['epub', 'pdf', 'txt', 'cbz', 'cbr']
@ -115,7 +116,6 @@ class KOBO(USBMS):
    def initialize(self):
        USBMS.initialize(self)
        self.book_class = Book
        self.dbversion = 7
    def books(self, oncard=None, end_session=True):
@ -1213,7 +1213,7 @@ class KOBOTOUCH(KOBO):
    min_dbversion_archive           = 71
    min_dbversion_images_on_sdcard  = 77
-    max_supported_fwversion         = (2,5,1)
+    max_supported_fwversion         = (2,5,3)
    min_fwversion_images_on_sdcard  = (2,4,1)
    has_kepubs = True
@ -1237,11 +1237,9 @@ class KOBOTOUCH(KOBO):
            _('Keep cover aspect ratio') +
            ':::'+_('When uploading covers, do not change the aspect ratio when resizing for the device.'
                    ' This is for firmware versions 2.3.1 and later.'),
-            _('Show expired books') +
+            _('Show archived books') +
-            ':::'+_('A bug in an earlier version left non kepubs book records'
+            ':::'+_('Archived books are listed on the device but need to be downloaded to read.'
-                ' in the database.  With this option Calibre will show the '
+                    ' Use this option to show these books and match them with books in the calibre library.'),
                'expired records and allow you to delete them with '
                'the new delete logic.'),
            _('Show Previews') +
            ':::'+_('Kobo previews are included on the Touch and some other versions'
                ' by default they are no longer displayed as there is no good reason to '
@ -1289,7 +1287,7 @@ class KOBOTOUCH(KOBO):
    OPT_UPLOAD_COVERS               = 3
    OPT_UPLOAD_GRAYSCALE_COVERS     = 4
    OPT_KEEP_COVER_ASPECT_RATIO     = 5
-    OPT_SHOW_EXPIRED_BOOK_RECORDS   = 6
+    OPT_SHOW_ARCHIVED_BOOK_RECORDS  = 6
    OPT_SHOW_PREVIEWS               = 7
    OPT_SHOW_RECOMMENDATIONS        = 8
    OPT_UPDATE_SERIES_DETAILS       = 9
@ -1347,6 +1345,10 @@ class KOBOTOUCH(KOBO):
        self.set_device_name()
        return super(KOBOTOUCH, self).get_device_information(end_session)
    def device_database_path(self):
        return self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')
    def books(self, oncard=None, end_session=True):
        debug_print("KoboTouch:books - oncard='%s'"%oncard)
        from calibre.ebooks.metadata.meta import path_to_ext
@ -1599,9 +1601,7 @@ class KOBOTOUCH(KOBO):
        self.debug_index = 0
        import sqlite3 as sqlite
-        with closing(sqlite.connect(
+        with closing(sqlite.connect(self.device_database_path())) as connection:
            self.normalize_path(self._main_prefix +
                '.kobo/KoboReader.sqlite'))) as connection:
            debug_print("KoboTouch:books - reading device database")
            # return bytestrings if the content cannot the decoded as unicode
@ -1618,7 +1618,21 @@ class KOBOTOUCH(KOBO):
            debug_print("KoboTouch:books - shelf list:", self.bookshelvelist)
            opts = self.settings()
-            if self.supports_series():
+            if self.supports_kobo_archive():
                query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \
                    "ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \
                    "IsDownloaded, Series, SeriesNumber, ___UserID " \
                    " from content " \
                    " where BookID is Null " \
                    " and ((Accessibility = -1 and IsDownloaded in ('true', 1 )) or (Accessibility in (1,2) %(expiry)s) " \
                    "    %(previews)s %(recomendations)s )" \
                    " and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) and ContentType = 6)") % \
                        dict(\
                             expiry="" if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else "and IsDownloaded in ('true', 1)", \
                             previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \
                             recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \
                             )
            elif self.supports_series():
                query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \
                    "ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \
                    "IsDownloaded, Series, SeriesNumber, ___UserID " \
@ -1627,7 +1641,7 @@ class KOBOTOUCH(KOBO):
                    " and ((Accessibility = -1 and IsDownloaded in ('true', 1)) or (Accessibility in (1,2)) %(previews)s %(recomendations)s )" \
                    " and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s") % \
                        dict(\
-                             expiry=" and ContentType = 6)" if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ")", \
+                             expiry=" and ContentType = 6)" if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ")", \
                             previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \
                             recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \
                             )
@ -1638,7 +1652,7 @@ class KOBOTOUCH(KOBO):
                    ' from content ' \
                    ' where BookID is Null %(previews)s %(recomendations)s and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % \
                        dict(\
-                             expiry=' and ContentType = 6)' if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')', \
+                             expiry=' and ContentType = 6)' if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ')', \
                             previews=' and Accessibility <> 6' if opts.extra_customization[self.OPT_SHOW_PREVIEWS] == False else '', \
                             recomendations=' and IsDownloaded in (\'true\', 1)' if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] == False else ''\
                             )
@ -1648,7 +1662,7 @@ class KOBOTOUCH(KOBO):
                    '"1" as IsDownloaded, null as Series, null as SeriesNumber, ___UserID' \
                    ' from content where ' \
                    'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
-                    if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
+                    if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ')')
            else:
                query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
                    'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, ' \
@ -2586,7 +2600,7 @@ class KOBOTOUCH(KOBO):
    def modify_database_check(self, function):
        # Checks to see whether the database version is supported
        # and whether the user has chosen to support the firmware version
-#        debug_print("KoboTouch:modify_database_check - self.fwversion <= self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion)
+#        debug_print("KoboTouch:modify_database_check - self.fwversion > self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion)
        if self.dbversion > self.supported_dbversion or self.fwversion > self.max_supported_fwversion:
            # Unsupported database
            opts = self.settings()
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -50,10 +50,10 @@ class PRST1(USBMS):
    VENDOR_NAME        = 'SONY'
    WINDOWS_MAIN_MEM   = re.compile(
-            r'(PRS-T(1|2)&)'
+            r'(PRS-T(1|2|2N)&)'
            )
    WINDOWS_CARD_A_MEM = re.compile(
-            r'(PRS-T(1|2)__SD&)'
+            r'(PRS-T(1|2|2N)__SD&)'
            )
    MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'
@ -66,7 +66,7 @@ class PRST1(USBMS):
    EXTRA_CUSTOMIZATION_MESSAGE = [
        _('Comma separated list of metadata fields '
-            'to turn into collections on the device. Possibilities include: ')+\
+            'to turn into collections on the device. Possibilities include: ')+
                    'series, tags, authors',
        _('Upload separate cover thumbnails for books') +
             ':::'+_('Normally, the SONY readers get the cover image from the'
@ -194,11 +194,11 @@ class PRST1(USBMS):
                time_offsets = {}
                for i, row in enumerate(cursor):
                    try:
-                        comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
+                        comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000)
                    except (OSError, IOError, TypeError):
                        # In case the db has incorrect path info
                        continue
-                    device_date = int(row[1]);
+                    device_date = int(row[1])
                    offset = device_date - comp_date
                    time_offsets.setdefault(offset, 0)
                    time_offsets[offset] = time_offsets[offset] + 1
@ -345,7 +345,7 @@ class PRST1(USBMS):
        # Insert the sequence Id if it doesn't
        query = ('INSERT INTO sqlite_sequence (name, seq) '
                'SELECT ?, ? '
-                'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
+                'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)')
        cursor.execute(query, (table, sequence_id, table,))
        cursor.close()
--- a/src/calibre/devices/smart_device_app/driver.py
+++ b/src/calibre/devices/smart_device_app/driver.py
@ -875,6 +875,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            self.client_device_kind = result.get('deviceKind', '')
            self._debug('Client device kind', self.client_device_kind)
            self.client_device_name = result.get('deviceName', self.client_device_kind)
            self._debug('Client device name', self.client_device_name)
            self.max_book_packet_len = result.get('maxBookContentPacketLen',
                                                  self.BASE_PACKET_LEN)
            self._debug('max_book_packet_len', self.max_book_packet_len)
@ -946,6 +949,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
        return False
    def get_gui_name(self):
        if getattr(self, 'client_device_name', None):
            return self.gui_name_template%(self.gui_name, self.client_device_name)
        if getattr(self, 'client_device_kind', None):
            return self.gui_name_template%(self.gui_name, self.client_device_kind)
        return self.gui_name
--- a/src/calibre/ebooks/conversion/plugins/txt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@ -91,14 +91,15 @@ class TXTInput(InputFormatPlugin):
            log.debug('Using user specified input encoding of %s' % ienc)
        else:
            det_encoding = detect(txt)
            det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
            if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
                    'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
                # Microsoft Word exports to HTML with encoding incorrectly set to
                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
                det_encoding = 'gbk'
-            ienc = det_encoding['encoding']
+            ienc = det_encoding
-            log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, det_encoding['confidence'] * 100))
+            log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, confidence * 100))
        if not ienc:
            ienc = 'utf-8'
            log.debug('No input encoding specified and could not auto detect using %s' % ienc)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -77,7 +77,7 @@ class Plumber(object):
    def __init__(self, input, output, log, report_progress=DummyReporter(),
            dummy=False, merge_plugin_recs=True, abort_after_input_dump=False,
-            override_input_metadata=False):
+            override_input_metadata=False, for_regex_wizard=False):
        '''
        :param input: Path to input file.
        :param output: Path to output file/directory
@ -87,6 +87,7 @@ class Plumber(object):
        if isbytestring(output):
            output = output.decode(filesystem_encoding)
        self.original_input_arg = input
        self.for_regex_wizard = for_regex_wizard
        self.input = os.path.abspath(input)
        self.output = os.path.abspath(output)
        self.log = log
@ -123,7 +124,7 @@ OptionRecommendation(name='input_profile',
                   'conversion system information on how to interpret '
                   'various information in the input document. For '
                   'example resolution dependent lengths (i.e. lengths in '
-                   'pixels). Choices are:')+\
+                   'pixels). Choices are:')+
                        ', '.join([x.short_name for x in input_profiles()])
        ),
@ -135,7 +136,7 @@ OptionRecommendation(name='output_profile',
                   'created document for the specified device. In some cases, '
                   'an output profile is required to produce documents that '
                   'will work on a device. For example EPUB on the SONY reader. '
-                   'Choices are:') + \
+                   'Choices are:') +
                           ', '.join([x.short_name for x in output_profiles()])
        ),
@ -490,7 +491,7 @@ OptionRecommendation(name='asciiize',
            'cases where there are multiple representations of a character '
            '(characters shared by Chinese and Japanese for instance) the '
            'representation based on the current calibre interface language will be '
-            'used.')%\
+            'used.')%
            u'\u041c\u0438\u0445\u0430\u0438\u043b '
            u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
 )
@ -711,7 +712,6 @@ OptionRecommendation(name='search_replace',
        self.input_fmt = input_fmt
        self.output_fmt = output_fmt
        self.all_format_options = set()
        self.input_options = set()
        self.output_options = set()
@ -783,8 +783,6 @@ OptionRecommendation(name='search_replace',
                    return f, os.path.splitext(f)[1].lower()[1:]
        return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
    def get_option_by_name(self, name):
        for group in (self.input_options, self.pipeline_options,
                      self.output_options, self.all_format_options):
@ -956,7 +954,6 @@ OptionRecommendation(name='search_replace',
        self.log.info('Input debug saved to:', out_dir)
    def run(self):
        '''
        Run the conversion pipeline
@ -965,6 +962,8 @@ OptionRecommendation(name='search_replace',
        self.setup_options()
        if self.opts.verbose:
            self.log.filter_level = self.log.DEBUG
        if self.for_regex_wizard and hasattr(self.opts, 'no_process'):
            self.opts.no_process = True
        self.flush()
        import cssutils, logging
        cssutils.log.setLevel(logging.WARN)
@ -1003,6 +1002,8 @@ OptionRecommendation(name='search_replace',
        self.ui_reporter(0.01, _('Converting input to HTML...'))
        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
        self.input_plugin.report_progress = ir
        if self.for_regex_wizard:
            self.input_plugin.for_viewer = True
        with self.input_plugin:
            self.oeb = self.input_plugin(stream, self.opts,
                                        self.input_fmt, self.log,
@ -1014,8 +1015,12 @@ OptionRecommendation(name='search_replace',
            if self.input_fmt in ('recipe', 'downloaded_recipe'):
                self.opts_to_mi(self.user_metadata)
            if not hasattr(self.oeb, 'manifest'):
-                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
+                self.oeb = create_oebbook(
-                        encoding=self.input_plugin.output_encoding)
+                    self.log, self.oeb, self.opts,
                    encoding=self.input_plugin.output_encoding,
                    for_regex_wizard=self.for_regex_wizard)
            if self.for_regex_wizard:
                return
            self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
            self.opts.is_image_collection = self.input_plugin.is_image_collection
            pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
@ -1081,7 +1086,6 @@ OptionRecommendation(name='search_replace',
            self.dump_oeb(self.oeb, out_dir)
            self.log('Structured HTML written to:', out_dir)
        if self.opts.extra_css and os.path.exists(self.opts.extra_css):
            self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
@ -1161,13 +1165,20 @@ OptionRecommendation(name='search_replace',
        self.log(self.output_fmt.upper(), 'output written to', self.output)
        self.flush()
 # This has to be global as create_oebbook can be called from other locations
 # (for example in the html input plugin)
 regex_wizard_callback = None
 def set_regex_wizard_callback(f):
    global regex_wizard_callback
    regex_wizard_callback = f
 def create_oebbook(log, path_or_stream, opts, reader=None,
-        encoding='utf-8', populate=True):
+        encoding='utf-8', populate=True, for_regex_wizard=False):
    '''
    Create an OEBBook.
    '''
    from calibre.ebooks.oeb.base import OEBBook
-    html_preprocessor = HTMLPreProcessor(log, opts)
+    html_preprocessor = HTMLPreProcessor(log, opts, regex_wizard_callback=regex_wizard_callback)
    if not encoding:
        encoding = None
    oeb = OEBBook(log, html_preprocessor,
@ -1182,3 +1193,4 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
    reader()(oeb, path_or_stream)
    return oeb
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -200,7 +200,7 @@ class Dehyphenator(object):
        # Add common suffixes to the regex below to increase the likelihood of a match -
        # don't add suffixes which are also complete words, such as 'able' or 'sex'
        # only remove if it's not already the point of hyphenation
-        self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"
+        self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"  # noqa
        self.suffixes = re.compile(r"^%s" % self.suffix_string, re.IGNORECASE)
        self.removesuffixes = re.compile(r"%s" % self.suffix_string, re.IGNORECASE)
        # remove prefixes if the prefix was not already the point of hyphenation
@ -265,19 +265,18 @@ class Dehyphenator(object):
        self.html = html
        self.format = format
        if format == 'html':
-            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)
+            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)  # noqa
        elif format == 'pdf':
            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
        elif format == 'txt':
-            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
+            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)  # noqa
        elif format == 'individual_words':
            intextmatch = re.compile(u'(?!<)(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)')
        elif format == 'html_cleanup':
-            intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
+            intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')  # noqa
        elif format == 'txt_cleanup':
            intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)(?P<wraptags>\s+)(?P<secondpart>[\w\d]+)')
        html = intextmatch.sub(self.dehyphenate, html)
        return html
@ -498,9 +497,11 @@ class HTMLPreProcessor(object):
                     (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                      lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                     ]
-    def __init__(self, log=None, extra_opts=None):
+    def __init__(self, log=None, extra_opts=None, regex_wizard_callback=None):
        self.log = log
        self.extra_opts = extra_opts
        self.regex_wizard_callback = regex_wizard_callback
        self.current_href = None
    def is_baen(self, src):
        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
@ -581,12 +582,15 @@ class HTMLPreProcessor(object):
                end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                end_rules.append(
                    # Un wrap using punctuation
-                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),  # noqa
                )
        for rule in self.PREPROCESS + start_rules:
            html = rule[0].sub(rule[1], html)
        if self.regex_wizard_callback is not None:
            self.regex_wizard_callback(self.current_href, html)
        if get_preprocess_html:
            return html
--- a/src/calibre/ebooks/docx/block_styles.py
+++ b/src/calibre/ebooks/docx/block_styles.py
@ -0,0 +1,290 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 from collections import OrderedDict
 from calibre.ebooks.docx.names import XPath, get
 class Inherit:
    pass
 inherit = Inherit()
 def binary_property(parent, name):
    vals = XPath('./w:%s' % name)(parent)
    if not vals:
        return inherit
    val = get(vals[0], 'w:val', 'on')
    return True if val in {'on', '1', 'true'} else False
 def simple_color(col, auto='black'):
    if not col or col == 'auto' or len(col) != 6:
        return auto
    return '#'+col
 def simple_float(val, mult=1.0):
    try:
        return float(val) * mult
    except (ValueError, TypeError, AttributeError, KeyError):
        return None
 LINE_STYLES = {  # {{{
    'basicBlackDashes': 'dashed',
    'basicBlackDots': 'dotted',
    'basicBlackSquares': 'dashed',
    'basicThinLines': 'solid',
    'dashDotStroked': 'groove',
    'dashed': 'dashed',
    'dashSmallGap': 'dashed',
    'dotDash': 'dashed',
    'dotDotDash': 'dashed',
    'dotted': 'dotted',
    'double': 'double',
    'inset': 'inset',
    'nil': 'none',
    'none': 'none',
    'outset': 'outset',
    'single': 'solid',
    'thick': 'solid',
    'thickThinLargeGap': 'double',
    'thickThinMediumGap': 'double',
    'thickThinSmallGap' : 'double',
    'thinThickLargeGap': 'double',
    'thinThickMediumGap': 'double',
    'thinThickSmallGap': 'double',
    'thinThickThinLargeGap': 'double',
    'thinThickThinMediumGap': 'double',
    'thinThickThinSmallGap': 'double',
    'threeDEmboss': 'ridge',
    'threeDEngrave': 'groove',
    'triple': 'double',
 }  # }}}
 # Read from XML {{{
 def read_border(parent, dest):
    tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
            'border_%s_style':inherit, 'border_%s_color':inherit}
    vals = {}
    for edge in ('left', 'top', 'right', 'bottom'):
        vals.update({k % edge:v for k, v in tvals.iteritems()})
    for border in XPath('./w:pBdr')(parent):
        for edge in ('left', 'top', 'right', 'bottom'):
            for elem in XPath('./w:%s' % edge):
                color = get(elem, 'w:color')
                if color is not None:
                    vals['border_%s_color' % edge] = simple_color(color)
                style = get(elem, 'w:val')
                if style is not None:
                    vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
                space = get(elem, 'w:space')
                if space is not None:
                    try:
                        vals['padding_%s' % edge] = float(space)
                    except (ValueError, TypeError):
                        pass
                sz = get(elem, 'w:sz')
                if sz is not None:
                    # we dont care about art borders (they are only used for page borders)
                    try:
                        vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
                    except (ValueError, TypeError):
                        pass
    for key, val in vals.iteritems():
        setattr(dest, key, val)
 def read_indent(parent, dest):
    padding_left = padding_right = text_indent = inherit
    for indent in XPath('./w:ind')(parent):
        l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars')
        pl = simple_float(lc, 0.01) if lc is not None else simple_float(l, 0.05) if l is not None else None
        if pl is not None:
            padding_left = '%.3g%s' % (pl, 'em' if lc is not None else 'pt')
        r, rc = get(indent, 'w:right'), get(indent, 'w:rightChars')
        pr = simple_float(rc, 0.01) if rc is not None else simple_float(r, 0.05) if r is not None else None
        if pr is not None:
            padding_right = '%.3g%s' % (pr, 'em' if rc is not None else 'pt')
        h, hc = get(indent, 'w:hanging'), get(indent, 'w:hangingChars')
        fl, flc = get(indent, 'w:firstLine'), get(indent, 'w:firstLineChars')
        h = h if h is None else '-'+h
        hc = hc if hc is None else '-'+hc
        ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
              simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
        if ti is not None:
            text_indent = '%.3g%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
    setattr(dest, 'margin_left', padding_left)
    setattr(dest, 'margin_right', padding_right)
    setattr(dest, 'text_indent', text_indent)
 def read_justification(parent, dest):
    ans = inherit
    for jc in XPath('./w:jc[@w:val]')(parent):
        val = get(jc, 'w:val')
        if not val:
            continue
        if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
            ans = 'justify'
        if val in {'left', 'center', 'right',}:
            ans = val
    setattr(dest, 'text_align', ans)
 def read_spacing(parent, dest):
    padding_top = padding_bottom = line_height = inherit
    for s in XPath('./w:spacing')(parent):
        a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing')
        pb = None if aa in {'on', '1', 'true'} else simple_float(al, 0.02) if al is not None else simple_float(a, 0.05) if a is not None else None
        if pb is not None:
            padding_bottom = '%.3g%s' % (pb, 'ex' if al is not None else 'pt')
        b, bl, bb = get(s, 'w:before'), get(s, 'w:beforeLines'), get(s, 'w:beforeAutospacing')
        pt = None if bb in {'on', '1', 'true'} else simple_float(bl, 0.02) if bl is not None else simple_float(b, 0.05) if b is not None else None
        if pt is not None:
            padding_top = '%.3g%s' % (pt, 'ex' if bl is not None else 'pt')
        l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
        if l is not None:
            lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
            line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
    setattr(dest, 'margin_top', padding_top)
    setattr(dest, 'margin_bottom', padding_bottom)
    setattr(dest, 'line_height', line_height)
 def read_direction(parent, dest):
    ans = inherit
    for jc in XPath('./w:textFlow[@w:val]')(parent):
        val = get(jc, 'w:val')
        if not val:
            continue
        if 'rl' in val.lower():
            ans = 'rtl'
    setattr(dest, 'direction', ans)
 def read_shd(parent, dest):
    ans = inherit
    for shd in XPath('./w:shd[@w:fill]')(parent):
        val = get(shd, 'w:fill')
        if val:
            ans = simple_color(val, auto='transparent')
    setattr(dest, 'background_color', ans)
 def read_numbering(parent, dest):
    lvl = num_id = None
    for np in XPath('./w:numPr')(parent):
        for ilvl in XPath('./w:ilvl[@w:val]')(np):
            try:
                lvl = int(get(ilvl, 'w:val'))
            except (ValueError, TypeError):
                pass
        for num in XPath('./w:numId[@w:val]')(np):
            num_id = get(num, 'w:val')
    val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
    setattr(dest, 'numbering', val)
 # }}}
 class ParagraphStyle(object):
    all_properties = (
        'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
        'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
        'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
        'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
        # Border margins padding
        'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
        'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
        'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
        'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
        'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
        # Misc.
        'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
        'numbering', 'font_family', 'font_size',
    )
    def __init__(self, pPr=None):
        self.linked_style = None
        if pPr is None:
            for p in self.all_properties:
                setattr(self, p, inherit)
        else:
            for p in (
                'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
                'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
                'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
                'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
            ):
                setattr(self, p, binary_property(pPr, p))
            for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'):
                f = globals()['read_%s' % x]
                f(pPr, self)
            for s in XPath('./w:pStyle[@w:val]')(pPr):
                self.linked_style = get(s, 'w:val')
            self.font_family = self.font_size = inherit
        self._css = None
    def update(self, other):
        for prop in self.all_properties:
            nval = getattr(other, prop)
            if nval is not inherit:
                setattr(self, prop, nval)
        if other.linked_style is not None:
            self.linked_style = other.linked_style
    def resolve_based_on(self, parent):
        for p in self.all_properties:
            val = getattr(self, p)
            if val is inherit:
                setattr(self, p, getattr(parent, p))
    @property
    def css(self):
        if self._css is None:
            self._css = c = OrderedDict()
            if self.keepLines is True:
                c['page-break-inside'] = 'avoid'
            if self.pageBreakBefore is True:
                c['page-break-before'] = 'always'
            for edge in ('left', 'top', 'right', 'bottom'):
                val = getattr(self, 'border_%s_width' % edge)
                if val is not inherit:
                    c['border-left-width'] = '%.3gpt' % val
                for x in ('style', 'color'):
                    val = getattr(self, 'border_%s_%s' % (edge, x))
                    if val is not inherit:
                        c['border-%s-%s' % (edge, x)] = val
                val = getattr(self, 'padding_%s' % edge)
                if val is not inherit:
                    c['padding-%s' % edge] = '%.3gpt' % val
                val = getattr(self, 'margin_%s' % edge)
                if val is not inherit:
                    c['margin-%s' % edge] = val
            if self.line_height not in {inherit, '1'}:
                c['line-height'] = self.line_height
            for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size'):
                val = getattr(self, x)
                if val is not inherit:
                    if x == 'font_size':
                        val = '%.3gpt' % val
                    c[x.replace('_', '-')] = val
        return self._css
        # TODO: keepNext must be done at markup level
--- a/src/calibre/ebooks/docx/char_styles.py
+++ b/src/calibre/ebooks/docx/char_styles.py
@ -0,0 +1,249 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 from collections import OrderedDict
 from calibre.ebooks.docx.block_styles import (  # noqa
    inherit, simple_color, LINE_STYLES, simple_float, binary_property, read_shd)
 from calibre.ebooks.docx.names import XPath, get
 # Read from XML {{{
 def read_text_border(parent, dest):
    border_color = border_style = border_width = padding = inherit
    elems = XPath('./w:bdr')(parent)
    if elems:
        border_color = simple_color('auto')
        border_style = 'solid'
        border_width = 1
    for elem in elems:
        color = get(elem, 'w:color')
        if color is not None:
            border_color = simple_color(color)
        style = get(elem, 'w:val')
        if style is not None:
            border_style = LINE_STYLES.get(style, 'solid')
        space = get(elem, 'w:space')
        if space is not None:
            try:
                padding = float(space)
            except (ValueError, TypeError):
                pass
        sz = get(elem, 'w:sz')
        if sz is not None:
            # we dont care about art borders (they are only used for page borders)
            try:
                border_width = min(96, max(2, float(sz))) / 8
            except (ValueError, TypeError):
                pass
    setattr(dest, 'border_color', border_color)
    setattr(dest, 'border_style', border_style)
    setattr(dest, 'border_width', border_width)
    setattr(dest, 'padding', padding)
 def read_color(parent, dest):
    ans = inherit
    for col in XPath('./w:color[@w:val]')(parent):
        val = get(col, 'w:val')
        if not val:
            continue
        ans = simple_color(val)
    setattr(dest, 'color', ans)
 def read_highlight(parent, dest):
    ans = inherit
    for col in XPath('./w:highlight[@w:val]')(parent):
        val = get(col, 'w:val')
        if not val:
            continue
        if not val or val == 'none':
            val = 'transparent'
        ans = val
    setattr(dest, 'highlight', ans)
 def read_lang(parent, dest):
    ans = inherit
    for col in XPath('./w:lang[@w:val]')(parent):
        val = get(col, 'w:val')
        if not val:
            continue
        try:
            code = int(val, 16)
        except (ValueError, TypeError):
            ans = val
        else:
            from calibre.ebooks.docx.lcid import lcid
            val = lcid.get(code, None)
            if val:
                ans = val
    setattr(dest, 'lang', ans)
 def read_letter_spacing(parent, dest):
    ans = inherit
    for col in XPath('./w:spacing[@w:val]')(parent):
        val = simple_float(get(col, 'w:val'), 0.05)
        if val is not None:
            ans = val
    setattr(dest, 'letter_spacing', ans)
 def read_sz(parent, dest):
    ans = inherit
    for col in XPath('./w:sz[@w:val]')(parent):
        val = simple_float(get(col, 'w:val'), 0.5)
        if val is not None:
            ans = val
    setattr(dest, 'font_size', ans)
 def read_underline(parent, dest):
    ans = inherit
    for col in XPath('./w:u[@w:val]')(parent):
        val = get(col, 'w:val')
        if val:
            ans = 'underline'
    setattr(dest, 'text_decoration', ans)
 def read_vert_align(parent, dest):
    ans = inherit
    for col in XPath('./w:vertAlign[@w:val]')(parent):
        val = get(col, 'w:val')
        if val and val in {'baseline', 'subscript', 'superscript'}:
            ans = val
    setattr(dest, 'vert_align', ans)
 def read_font_family(parent, dest):
    ans = inherit
    for col in XPath('./w:rFonts[@w:ascii]')(parent):
        val = get(col, 'w:ascii')
        if val:
            ans = val
    setattr(dest, 'font_family', ans)
 # }}}
 class RunStyle(object):
    all_properties = {
        'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint',
        'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
        'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
        'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family'
    }
    toggle_properties = {
        'b', 'bCs', 'caps', 'emboss', 'i', 'iCs', 'imprint', 'shadow', 'smallCaps', 'strike', 'dstrike', 'vanish',
    }
    def __init__(self, rPr=None):
        self.linked_style = None
        if rPr is None:
            for p in self.all_properties:
                setattr(self, p, inherit)
        else:
            for p in (
                'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
                'smallCaps', 'strike', 'vanish',
            ):
                setattr(self, p, binary_property(rPr, p))
            for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang', 'font_family'):
                f = globals()['read_%s' % x]
                f(rPr, self)
            for s in XPath('./w:rStyle[@w:val]')(rPr):
                self.linked_style = get(s, 'w:val')
        self._css = None
    def update(self, other):
        for prop in self.all_properties:
            nval = getattr(other, prop)
            if nval is not inherit:
                setattr(self, prop, nval)
        if other.linked_style is not None:
            self.linked_style = other.linked_style
    def resolve_based_on(self, parent):
        for p in self.all_properties:
            val = getattr(self, p)
            if val is inherit:
                setattr(self, p, getattr(parent, p))
    def get_border_css(self, ans):
        for x in ('color', 'style', 'width'):
            val = getattr(self, 'border_'+x)
            if x == 'width' and val is not inherit:
                val = '%.3gpt' % val
            if val is not inherit:
                ans['border-%s' % x] = val
    def clear_border_css(self):
        for x in ('color', 'style', 'width'):
            setattr(self, 'border_'+x, inherit)
    @property
    def css(self):
        if self._css is None:
            c = self._css = OrderedDict()
            td = set()
            if self.text_decoration is not inherit:
                td.add(self.text_decoration)
            if self.strike:
                td.add('line-through')
            if self.dstrike:
                td.add('line-through')
            if td:
                c['text-decoration'] = ' '.join(td)
            if self.caps is True:
                c['text-transform'] = 'uppercase'
            if self.i is True:
                c['font-style'] = 'italic'
            if self.shadow:
                c['text-shadow'] = '2px 2px'
            if self.smallCaps is True:
                c['font-variant'] = 'small-caps'
            if self.vanish is True:
                c['display'] = 'none'
            self.get_border_css(c)
            if self.padding is not inherit:
                c['padding'] = '%.3gpt' % self.padding
            for x in ('color', 'background_color'):
                val = getattr(self, x)
                if val is not inherit:
                    c[x.replace('_', '-')] = val
            for x in ('letter_spacing', 'font_size'):
                val = getattr(self, x)
                if val is not inherit:
                    c[x.replace('_', '-')] = '%.3gpt' % val
            if self.highlight is not inherit and self.highlight != 'transparent':
                c['background-color'] = self.highlight
            if self.b:
                c['font-weight'] = 'bold'
            if self.font_family is not inherit:
                c['font-family'] = self.font_family
        return self._css
    def same_border(self, other):
        for x in (self, other):
            has_border = False
            for y in ('color', 'style', 'width'):
                if ('border-%s' % y) in x.css:
                    has_border = True
                    break
            if not has_border:
                return False
        s = tuple(self.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
        o = tuple(other.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
        return s == o
--- a/src/calibre/ebooks/docx/container.py
+++ b/src/calibre/ebooks/docx/container.py
@ -105,6 +105,9 @@ class DOCX(object):
            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
            self.names[name] = f
    def exists(self, name):
        return name in self.names
    def read(self, name):
        if hasattr(self, 'zipf'):
            return self.zipf.open(name).read()
@ -149,14 +152,41 @@ class DOCX(object):
            self.relationships_rmap[target] = typ
    @property
-    def document(self):
+    def document_name(self):
        name = self.relationships.get(DOCUMENT, None)
        if name is None:
            names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml'))
            if not names:
                raise InvalidDOCX('The file %s docx file has no main document' % self.name)
            name = names[0]
-        return fromstring(self.read(name))
+        return name
    @property
    def document(self):
        return fromstring(self.read(self.document_name))
    @property
    def document_relationships(self):
        return self.get_relationships(self.document_name)
    def get_relationships(self, name):
        base = '/'.join(name.split('/')[:-1])
        by_id, by_type = {}, {}
        parts = name.split('/')
        name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels'])
        try:
            raw = self.read(name)
        except KeyError:
            pass
        else:
            root = fromstring(raw)
            for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
                target = '/'.join((base, item.get('Target').lstrip('/')))
                typ = item.get('Type')
                Id = item.get('Id')
                by_id[Id] = by_type[typ] = target
        return by_id, by_type
    @property
    def metadata(self):
--- a/src/calibre/ebooks/docx/dump.py
+++ b/src/calibre/ebooks/docx/dump.py
@ -0,0 +1,37 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import sys, os, shutil
 from lxml import etree
 from calibre import walk
 from calibre.utils.zipfile import ZipFile
 def dump(path):
    dest = os.path.splitext(os.path.basename(path))[0]
    dest += '_extracted'
    if os.path.exists(dest):
        shutil.rmtree(dest)
    with ZipFile(path) as zf:
        zf.extractall(dest)
    for f in walk(dest):
        if f.endswith('.xml') or f.endswith('.rels'):
            with open(f, 'r+b') as stream:
                raw = stream.read()
                root = etree.fromstring(raw)
                stream.seek(0)
                stream.truncate()
                stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
    print (path, 'dumped to', dest)
 if __name__ == '__main__':
    dump(sys.argv[-1])
--- a/src/calibre/ebooks/docx/fonts.py
+++ b/src/calibre/ebooks/docx/fonts.py
@ -0,0 +1,132 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, re
 from collections import namedtuple
 from calibre.ebooks.docx.block_styles import binary_property, inherit
 from calibre.ebooks.docx.names import XPath, get
 from calibre.utils.filenames import ascii_filename
 from calibre.utils.fonts.scanner import font_scanner, NoFonts
 from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
 Embed = namedtuple('Embed', 'name key subsetted')
 def has_system_fonts(name):
    try:
        return bool(font_scanner.fonts_for_family(name))
    except NoFonts:
        return False
 def get_variant(bold=False, italic=False):
    return {(False, False):'Regular', (False, True):'Italic',
            (True, False):'Bold', (True, True):'BoldItalic'}[(bold, italic)]
 class Family(object):
    def __init__(self, elem, embed_relationships):
        self.name = self.family_name = get(elem, 'w:name')
        self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem))
        if self.alt_names and not has_system_fonts(self.name):
            for x in self.alt_names:
                if has_system_fonts(x):
                    self.family_name = x
                    break
        self.embedded = {}
        for x in ('Regular', 'Bold', 'Italic', 'BoldItalic'):
            for y in XPath('./w:embed%s[@r:id]' % x)(elem):
                rid = get(y, 'r:id')
                key = get(y, 'w:fontKey')
                subsetted = get(y, 'w:subsetted') in {'1', 'true', 'on'}
                if rid in embed_relationships:
                    self.embedded[x] = Embed(embed_relationships[rid], key, subsetted)
        self.generic_family = 'auto'
        for x in XPath('./w:family[@w:val]')(elem):
            self.generic_family = get(x, 'w:val', 'auto')
        ntt = binary_property(elem, 'notTrueType')
        self.is_ttf = ntt is inherit or not ntt
        self.panose1 = None
        self.panose_name = None
        for x in XPath('./w:panose1[@w:val]')(elem):
            try:
                v = get(x, 'w:val')
                v = tuple(int(v[i:i+2], 16) for i in xrange(0, len(v), 2))
            except (TypeError, ValueError, IndexError):
                pass
            else:
                self.panose1 = v
                self.panose_name = panose_to_css_generic_family(v)
        self.css_generic_family = {'roman':'serif', 'swiss':'sans-serif', 'modern':'monospace',
                                   'decorative':'fantasy', 'script':'cursive'}.get(self.generic_family, None)
        self.css_generic_family = self.css_generic_family or self.panose_name or 'serif'
 class Fonts(object):
    def __init__(self):
        self.fonts = {}
        self.used = set()
    def __call__(self, root, embed_relationships, docx, dest_dir):
        for elem in XPath('//w:font[@w:name]')(root):
            self.fonts[get(elem, 'w:name')] = Family(elem, embed_relationships)
    def family_for(self, name, bold=False, italic=False):
        f = self.fonts.get(name, None)
        if f is None:
            return 'serif'
        variant = get_variant(bold, italic)
        self.used.add((name, variant))
        name = f.name if variant in f.embedded else f.family_name
        return '"%s", %s' % (name.replace('"', ''), f.css_generic_family)
    def embed_fonts(self, dest_dir, docx):
        defs = []
        dest_dir = os.path.join(dest_dir, 'fonts')
        for name, variant in self.used:
            f = self.fonts[name]
            if variant in f.embedded:
                if not os.path.exists(dest_dir):
                    os.mkdir(dest_dir)
                fname = self.write(name, dest_dir, docx, variant)
                if fname is not None:
                    d = {'font-family':'"%s"' % name.replace('"', ''), 'src': 'url("fonts/%s")' % fname}
                    if 'Bold' in variant:
                        d['font-weight'] = 'bold'
                    if 'Italic' in variant:
                        d['font-style'] = 'italic'
                    d = ['%s: %s' % (k, v) for k, v in d.iteritems()]
                    d = ';\n\t'.join(d)
                    defs.append('@font-face {\n\t%s\n}\n' % d)
        return '\n'.join(defs)
    def write(self, name, dest_dir, docx, variant):
        f = self.fonts[name]
        ef = f.embedded[variant]
        raw = docx.read(ef.name)
        prefix = raw[:32]
        if ef.key:
            key = re.sub(r'[^A-Fa-f0-9]', '', ef.key)
            key = bytearray(reversed(tuple(int(key[i:i+2], 16) for i in xrange(0, len(key), 2))))
            prefix = bytearray(prefix)
            prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in xrange(len(prefix))))
        if not is_truetype_font(prefix):
            return None
        ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf'
        fname = ascii_filename('%s - %s.%s' % (name, variant, ext))
        with open(os.path.join(dest_dir, fname), 'wb') as dest:
            dest.write(prefix)
            dest.write(raw[32:])
        return fname
--- a/src/calibre/ebooks/docx/lcid.py
+++ b/src/calibre/ebooks/docx/lcid.py
@ -0,0 +1,233 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 lcid = {
    1078: 'af',  # Afrikaans - South Africa
    1052: 'sq',  # Albanian - Albania
    1118: 'am',  # Amharic - Ethiopia
    1025: 'ar',  # Arabic - Saudi Arabia
    5121: 'ar',  # Arabic - Algeria
    15361: 'ar',  # Arabic - Bahrain
    3073: 'ar',  # Arabic - Egypt
    2049: 'ar',  # Arabic - Iraq
    11265: 'ar',  # Arabic - Jordan
    13313: 'ar',  # Arabic - Kuwait
    12289: 'ar',  # Arabic - Lebanon
    4097: 'ar',  # Arabic - Libya
    6145: 'ar',  # Arabic - Morocco
    8193: 'ar',  # Arabic - Oman
    16385: 'ar',  # Arabic - Qatar
    10241: 'ar',  # Arabic - Syria
    7169: 'ar',  # Arabic - Tunisia
    14337: 'ar',  # Arabic - U.A.E.
    9217: 'ar',  # Arabic - Yemen
    1067: 'hy',  # Armenian - Armenia
    1101: 'as',  # Assamese
    2092: 'az',  # Azeri (Cyrillic)
    1068: 'az',  # Azeri (Latin)
    1069: 'eu',  # Basque
    1059: 'be',  # Belarusian
    1093: 'bn',  # Bengali (India)
    2117: 'bn',  # Bengali (Bangladesh)
    5146: 'bs',  # Bosnian (Bosnia/Herzegovina)
    1026: 'bg',  # Bulgarian
    1109: 'my',  # Burmese
    1027: 'ca',  # Catalan
    1116: 'chr',  # Cherokee - United States
    2052: 'zh',  # Chinese - People's Republic of China
    4100: 'zh',  # Chinese - Singapore
    1028: 'zh',  # Chinese - Taiwan
    3076: 'zh',  # Chinese - Hong Kong SAR
    5124: 'zh',  # Chinese - Macao SAR
    1050: 'hr',  # Croatian
    4122: 'hr',  # Croatian (Bosnia/Herzegovina)
    1029: 'cs',  # Czech
    1030: 'da',  # Danish
    1125: 'dv',  # Divehi
    1043: 'nl',  # Dutch - Netherlands
    2067: 'nl',  # Dutch - Belgium
    1126: 'bin',  # Edo
    1033: 'en',  # English - United States
    2057: 'en',  # English - United Kingdom
    3081: 'en',  # English - Australia
    10249: 'en',  # English - Belize
    4105: 'en',  # English - Canada
    9225: 'en',  # English - Caribbean
    15369: 'en',  # English - Hong Kong SAR
    16393: 'en',  # English - India
    14345: 'en',  # English - Indonesia
    6153: 'en',  # English - Ireland
    8201: 'en',  # English - Jamaica
    17417: 'en',  # English - Malaysia
    5129: 'en',  # English - New Zealand
    13321: 'en',  # English - Philippines
    18441: 'en',  # English - Singapore
    7177: 'en',  # English - South Africa
    11273: 'en',  # English - Trinidad
    12297: 'en',  # English - Zimbabwe
    1061: 'et',  # Estonian
    1080: 'fo',  # Faroese
    1065: None,  # TODO: Farsi
    1124: 'fil',  # Filipino
    1035: 'fi',  # Finnish
    1036: 'fr',  # French - France
    2060: 'fr',  # French - Belgium
    11276: 'fr',  # French - Cameroon
    3084: 'fr',  # French - Canada
    9228: 'fr',  # French - Democratic Rep. of Congo
    12300: 'fr',  # French - Cote d'Ivoire
    15372: 'fr',  # French - Haiti
    5132: 'fr',  # French - Luxembourg
    13324: 'fr',  # French - Mali
    6156: 'fr',  # French - Monaco
    14348: 'fr',  # French - Morocco
    58380: 'fr',  # French - North Africa
    8204: 'fr',  # French - Reunion
    10252: 'fr',  # French - Senegal
    4108: 'fr',  # French - Switzerland
    7180: 'fr',  # French - West Indies
    1122: 'fy',  # Frisian - Netherlands
    1127: None,  # TODO: Fulfulde - Nigeria
    1071: 'mk',  # FYRO Macedonian
    2108: 'ga',  # Gaelic (Ireland)
    1084: 'gd',  # Gaelic (Scotland)
    1110: 'gl',  # Galician
    1079: 'ka',  # Georgian
    1031: 'de',  # German - Germany
    3079: 'de',  # German - Austria
    5127: 'de',  # German - Liechtenstein
    4103: 'de',  # German - Luxembourg
    2055: 'de',  # German - Switzerland
    1032: 'el',  # Greek
    1140: 'gn',  # Guarani - Paraguay
    1095: 'gu',  # Gujarati
    1128: 'ha',  # Hausa - Nigeria
    1141: 'haw',  # Hawaiian - United States
    1037: 'he',  # Hebrew
    1081: 'hi',  # Hindi
    1038: 'hu',  # Hungarian
    1129: None,  # TODO: Ibibio - Nigeria
    1039: 'is',  # Icelandic
    1136: 'ig',  # Igbo - Nigeria
    1057: 'id',  # Indonesian
    1117: 'iu',  # Inuktitut
    1040: 'it',  # Italian - Italy
    2064: 'it',  # Italian - Switzerland
    1041: 'ja',  # Japanese
    1099: 'kn',  # Kannada
    1137: 'kr',  # Kanuri - Nigeria
    2144: 'ks',  # Kashmiri
    1120: 'ks',  # Kashmiri (Arabic)
    1087: 'kk',  # Kazakh
    1107: 'km',  # Khmer
    1111: 'kok',  # Konkani
    1042: 'ko',  # Korean
    1088: 'ky',  # Kyrgyz (Cyrillic)
    1108: 'lo',  # Lao
    1142: 'la',  # Latin
    1062: 'lv',  # Latvian
    1063: 'lt',  # Lithuanian
    1086: 'ms',  # Malay - Malaysia
    2110: 'ms',  # Malay - Brunei Darussalam
    1100: 'ml',  # Malayalam
    1082: 'mt',  # Maltese
    1112: 'mni',  # Manipuri
    1153: 'mi',  # Maori - New Zealand
    1102: 'mr',  # Marathi
    1104: 'mn',  # Mongolian (Cyrillic)
    2128: 'mn',  # Mongolian (Mongolian)
    1121: 'ne',  # Nepali
    2145: 'ne',  # Nepali - India
    1044: 'no',  # Norwegian (Bokmￃﾥl)
    2068: 'no',  # Norwegian (Nynorsk)
    1096: 'or',  # Oriya
    1138: 'om',  # Oromo
    1145: 'pap',  # Papiamentu
    1123: 'ps',  # Pashto
    1045: 'pl',  # Polish
    1046: 'pt',  # Portuguese - Brazil
    2070: 'pt',  # Portuguese - Portugal
    1094: 'pa',  # Punjabi
    2118: 'pa',  # Punjabi (Pakistan)
    1131: 'qu',  # Quecha - Bolivia
    2155: 'qu',  # Quecha - Ecuador
    3179: 'qu',  # Quecha - Peru
    1047: 'rm',  # Rhaeto-Romanic
    1048: 'ro',  # Romanian
    2072: 'ro',  # Romanian - Moldava
    1049: 'ru',  # Russian
    2073: 'ru',  # Russian - Moldava
    1083: 'se',  # Sami (Lappish)
    1103: 'sa',  # Sanskrit
    1132: 'nso',  # Sepedi
    3098: 'sr',  # Serbian (Cyrillic)
    2074: 'sr',  # Serbian (Latin)
    1113: 'sd',  # Sindhi - India
    2137: 'sd',  # Sindhi - Pakistan
    1115: 'si',  # Sinhalese - Sri Lanka
    1051: 'sk',  # Slovak
    1060: 'sl',  # Slovenian
    1143: 'so',  # Somali
    1070: 'wen',  # Sorbian
    3082: 'es',  # Spanish - Spain (Modern Sort)
    1034: 'es',  # Spanish - Spain (Traditional Sort)
    11274: 'es',  # Spanish - Argentina
    16394: 'es',  # Spanish - Bolivia
    13322: 'es',  # Spanish - Chile
    9226: 'es',  # Spanish - Colombia
    5130: 'es',  # Spanish - Costa Rica
    7178: 'es',  # Spanish - Dominican Republic
    12298: 'es',  # Spanish - Ecuador
    17418: 'es',  # Spanish - El Salvador
    4106: 'es',  # Spanish - Guatemala
    18442: 'es',  # Spanish - Honduras
    58378: 'es',  # Spanish - Latin America
    2058: 'es',  # Spanish - Mexico
    19466: 'es',  # Spanish - Nicaragua
    6154: 'es',  # Spanish - Panama
    15370: 'es',  # Spanish - Paraguay
    10250: 'es',  # Spanish - Peru
    20490: 'es',  # Spanish - Puerto Rico
    21514: 'es',  # Spanish - United States
    14346: 'es',  # Spanish - Uruguay
    8202: 'es',  # Spanish - Venezuela
    1072: None,  # TODO: Sutu
    1089: 'sw',  # Swahili
    1053: 'sv',  # Swedish
    2077: 'sv',  # Swedish - Finland
    1114: 'syr',  # Syriac
    1064: 'tg',  # Tajik
    1119: None,  # TODO: Tamazight (Arabic)
    2143: None,  # TODO: Tamazight (Latin)
    1097: 'ta',  # Tamil
    1092: 'tt',  # Tatar
    1098: 'te',  # Telugu
    1054: 'th',  # Thai
    2129: 'bo',  # Tibetan - Bhutan
    1105: 'bo',  # Tibetan - People's Republic of China
    2163: 'ti',  # Tigrigna - Eritrea
    1139: 'ti',  # Tigrigna - Ethiopia
    1073: 'ts',  # Tsonga
    1074: 'tn',  # Tswana
    1055: 'tr',  # Turkish
    1090: 'tk',  # Turkmen
    1152: 'ug',  # Uighur - China
    1058: 'uk',  # Ukrainian
    1056: 'ur',  # Urdu
    2080: 'ur',  # Urdu - India
    2115: 'uz',  # Uzbek (Cyrillic)
    1091: 'uz',  # Uzbek (Latin)
    1075: 've',  # Venda
    1066: 'vi',  # Vietnamese
    1106: 'cy',  # Welsh
    1076: 'xh',  # Xhosa
    1144: 'ii',  # Yi
    1085: 'yi',  # Yiddish
    1130: 'yo',  # Yoruba
    1077: 'zu'  # Zulu
 }
--- a/src/calibre/ebooks/docx/names.py
+++ b/src/calibre/ebooks/docx/names.py
@ -11,6 +11,9 @@ from lxml.etree import XPath as X
 DOCUMENT  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
 DOCPROPS  = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
 APPPROPS  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
 STYLES    = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
 NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
 FONTS     = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
 namespaces = {
    'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
@ -20,6 +23,7 @@ namespaces = {
    'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
    'w10': 'urn:schemas-microsoft-com:office:word',
    'wne': 'http://schemas.microsoft.com/office/word/2006/wordml',
    'xml': 'http://www.w3.org/XML/1998/namespace',
    # Drawing
    'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
    'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
@ -42,6 +46,26 @@ namespaces = {
    'dcterms': 'http://purl.org/dc/terms/'
 }
-def XPath(expr):
+xpath_cache = {}
-    return X(expr, namespaces=namespaces)
+
 def XPath(expr):
    ans = xpath_cache.get(expr, None)
    if ans is None:
        xpath_cache[expr] = ans = X(expr, namespaces=namespaces)
    return ans
 def is_tag(x, q):
    tag = getattr(x, 'tag', x)
    ns, name = q.partition(':')[0::2]
    return '{%s}%s' % (namespaces.get(ns, None), name) == tag
 def barename(x):
    return x.rpartition('}')[-1]
 def XML(x):
    return '{%s}%s' % (namespaces['xml'], x)
 def get(x, attr, default=None):
    ns, name = attr.partition(':')[0::2]
    return x.attrib.get('{%s}%s' % (namespaces[ns], name), default)
--- a/src/calibre/ebooks/docx/numbering.py
+++ b/src/calibre/ebooks/docx/numbering.py
@ -0,0 +1,300 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import re
 from collections import Counter
 from lxml.html.builder import OL, UL, SPAN
 from calibre.ebooks.docx.block_styles import ParagraphStyle
 from calibre.ebooks.docx.char_styles import RunStyle
 from calibre.ebooks.docx.names import XPath, get
 STYLE_MAP = {
    'aiueo': 'hiragana',
    'aiueoFullWidth': 'hiragana',
    'hebrew1': 'hebrew',
    'iroha': 'katakana-iroha',
    'irohaFullWidth': 'katakana-iroha',
    'lowerLetter': 'lower-alpha',
    'lowerRoman': 'lower-roman',
    'none': 'none',
    'upperLetter': 'upper-alpha',
    'upperRoman': 'upper-roman',
    'chineseCounting': 'cjk-ideographic',
    'decimalZero': 'decimal-leading-zero',
 }
 class Level(object):
    def __init__(self, lvl=None):
        self.restart = None
        self.start = 0
        self.fmt = 'decimal'
        self.para_link = None
        self.paragraph_style = self.character_style = None
        self.is_numbered = False
        self.num_template = None
        if lvl is not None:
            self.read_from_xml(lvl)
    def copy(self):
        ans = Level()
        for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'):
            setattr(ans, x, getattr(self, x))
        return ans
    def format_template(self, counter, ilvl):
        def sub(m):
            x = int(m.group(1)) - 1
            if x > ilvl or x not in counter:
                return ''
            return '%d' % (counter[x] - (0 if x == ilvl else 1))
        return re.sub(r'%(\d+)', sub, self.num_template).rstrip() + '\xa0'
    def read_from_xml(self, lvl, override=False):
        for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
            try:
                self.restart = int(get(lr, 'w:val'))
            except (TypeError, ValueError):
                pass
        for lr in XPath('./w:start[@w:val]')(lvl):
            try:
                self.start = int(get(lr, 'w:val'))
            except (TypeError, ValueError):
                pass
        lt = None
        for lr in XPath('./w:lvlText[@w:val]')(lvl):
            lt = get(lr, 'w:val')
        for lr in XPath('./w:numFmt[@w:val]')(lvl):
            val = get(lr, 'w:val')
            if val == 'bullet':
                self.is_numbered = False
                self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
            else:
                self.is_numbered = True
                self.fmt = STYLE_MAP.get(val, 'decimal')
                if lt and re.match(r'%\d+\.$', lt) is None:
                    self.num_template = lt
        for lr in XPath('./w:pStyle[@w:val]')(lvl):
            self.para_link = get(lr, 'w:val')
        for pPr in XPath('./w:pPr')(lvl):
            ps = ParagraphStyle(pPr)
            if self.paragraph_style is None:
                self.paragraph_style = ps
            else:
                self.paragraph_style.update(ps)
        for rPr in XPath('./w:rPr')(lvl):
            ps = RunStyle(rPr)
            if self.character_style is None:
                self.character_style = ps
            else:
                self.character_style.update(ps)
 class NumberingDefinition(object):
    def __init__(self, parent=None):
        self.levels = {}
        if parent is not None:
            for lvl in XPath('./w:lvl')(parent):
                try:
                    ilvl = int(get(lvl, 'w:ilvl', 0))
                except (TypeError, ValueError):
                    ilvl = 0
                self.levels[ilvl] = Level(lvl)
    def copy(self):
        ans = NumberingDefinition()
        for l, lvl in self.levels.iteritems():
            ans.levels[l] = lvl.copy()
        return ans
 class Numbering(object):
    def __init__(self):
        self.definitions = {}
        self.instances = {}
        self.counters = {}
    def __call__(self, root, styles):
        ' Read all numbering style definitions '
        lazy_load = {}
        for an in XPath('./w:abstractNum[@w:abstractNumId]')(root):
            an_id = get(an, 'w:abstractNumId')
            nsl = XPath('./w:numStyleLink[@w:val]')(an)
            if nsl:
                lazy_load[an_id] = get(nsl[0], 'w:val')
            else:
                nd = NumberingDefinition(an)
                self.definitions[an_id] = nd
        def create_instance(n, definition):
            nd = definition.copy()
            for lo in XPath('./w:lvlOverride')(n):
                ilvl = get(lo, 'w:ilvl')
                for lvl in XPath('./w:lvl')(lo)[:1]:
                    nilvl = get(lvl, 'w:ilvl')
                    ilvl = nilvl if ilvl is None else ilvl
                    alvl = nd.levels.get(ilvl, None)
                    if alvl is None:
                        alvl = Level()
                    alvl.read_from_xml(lvl, override=True)
            return nd
        next_pass = {}
        for n in XPath('./w:num[@w:numId]')(root):
            an_id = None
            num_id = get(n, 'w:numId')
            for an in XPath('./w:abstractNumId[@w:val]')(n):
                an_id = get(an, 'w:val')
            d = self.definitions.get(an_id, None)
            if d is None:
                next_pass[num_id] = (an_id, n)
                continue
            self.instances[num_id] = create_instance(n, d)
        numbering_links = styles.numbering_style_links
        for an_id, style_link in lazy_load.iteritems():
            num_id = numbering_links[style_link]
            self.definitions[an_id] = self.instances[num_id].copy()
        for num_id, (an_id, n) in next_pass.iteritems():
            d = self.definitions.get(an_id, None)
            if d is not None:
                self.instances[num_id] = create_instance(n, d)
        for num_id, d in self.instances.iteritems():
            self.counters[num_id] = Counter({lvl:d.levels[lvl].start for lvl in d.levels})
    def get_pstyle(self, num_id, style_id):
        d = self.instances.get(num_id, None)
        if d is not None:
            for ilvl, lvl in d.levels.iteritems():
                if lvl.para_link == style_id:
                    return ilvl
    def get_para_style(self, num_id, lvl):
        d = self.instances.get(num_id, None)
        if d is not None:
            lvl = d.levels.get(lvl, None)
            return getattr(lvl, 'paragraph_style', None)
    def update_counter(self, counter, levelnum, levels):
        counter[levelnum] += 1
        for ilvl, lvl in levels.iteritems():
            restart = lvl.restart
            if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1:
                counter[ilvl] = lvl.start
    def apply_markup(self, items, body, styles, object_map):
        for p, num_id, ilvl in items:
            d = self.instances.get(num_id, None)
            if d is not None:
                lvl = d.levels.get(ilvl, None)
                if lvl is not None:
                    counter = self.counters[num_id]
                    p.tag = 'li'
                    p.set('value', '%s' % counter[ilvl])
                    p.set('list-lvl', str(ilvl))
                    p.set('list-id', num_id)
                    if lvl.num_template is not None:
                        val = lvl.format_template(counter, ilvl)
                        p.set('list-template', val)
                    self.update_counter(counter, ilvl, d.levels)
        templates = {}
        def commit(current_run):
            if not current_run:
                return
            start = current_run[0]
            parent = start.getparent()
            idx = parent.index(start)
            d = self.instances[start.get('list-id')]
            ilvl = int(start.get('list-lvl'))
            lvl = d.levels[ilvl]
            lvlid = start.get('list-id') + start.get('list-lvl')
            wrap = (OL if lvl.is_numbered else UL)('\n\t')
            has_template = 'list-template' in start.attrib
            if has_template:
                wrap.set('lvlid', lvlid)
            else:
                wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list'))
            parent.insert(idx, wrap)
            last_val = None
            for child in current_run:
                wrap.append(child)
                child.tail = '\n\t'
                if has_template:
                    span = SPAN()
                    span.text = child.text
                    child.text = None
                    for gc in child:
                        span.append(gc)
                    child.append(span)
                    span = SPAN(child.get('list-template'))
                    last = templates.get(lvlid, '')
                    if span.text and len(span.text) > len(last):
                        templates[lvlid] = span.text
                    child.insert(0, span)
                for attr in ('list-lvl', 'list-id', 'list-template'):
                    child.attrib.pop(attr, None)
                val = int(child.get('value'))
                if last_val == val - 1 or wrap.tag == 'ul':
                    child.attrib.pop('value')
                last_val = val
            current_run[-1].tail = '\n'
            del current_run[:]
        parents = set()
        for child in body.iterdescendants('li'):
            parents.add(child.getparent())
        for parent in parents:
            current_run = []
            for child in parent:
                if child.tag == 'li':
                    if current_run:
                        last = current_run[-1]
                        if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')):
                            commit(current_run)
                    current_run.append(child)
                else:
                    commit(current_run)
            commit(current_run)
        for wrap in body.xpath('//ol[@lvlid]'):
            lvlid = wrap.attrib.pop('lvlid')
            wrap.tag = 'div'
            text = ''
            maxtext = templates.get(lvlid, '').replace('.', '')[:-1]
            for li in wrap.iterchildren('li'):
                t = li[0].text
                if t and len(t) > len(text):
                    text = t
            for i, li in enumerate(wrap.iterchildren('li')):
                li.tag = 'div'
                li.attrib.pop('value', None)
                li.set('style', 'display:table-row')
                obj = object_map[li]
                bs = styles.para_cache[obj]
                if i == 0:
                    m = len(maxtext)  # Move the table left to simulate the behavior of a list (number is to the left of text margin)
                    wrap.set('style', 'display:table; margin-left: -%dem; padding-left: %s' % (m, bs.css.get('margin-left', 0)))
                bs.css.pop('margin-left', None)
                for child in li:
                    child.set('style', 'display:table-cell')
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@ -0,0 +1,365 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import textwrap
 from collections import OrderedDict, Counter
 from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
 from calibre.ebooks.docx.char_styles import RunStyle
 from calibre.ebooks.docx.names import XPath, get
 class Style(object):
    '''
    Class representing a <w:style> element. Can contain block, character, etc. styles.
    '''
    name_path = XPath('./w:name[@w:val]')
    based_on_path = XPath('./w:basedOn[@w:val]')
    def __init__(self, elem):
        self.resolved = False
        self.style_id = get(elem, 'w:styleId')
        self.style_type = get(elem, 'w:type')
        names = self.name_path(elem)
        self.name = get(names[-1], 'w:val') if names else None
        based_on = self.based_on_path(elem)
        self.based_on = get(based_on[0], 'w:val') if based_on else None
        if self.style_type == 'numbering':
            self.based_on = None
        self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'}
        self.paragraph_style = self.character_style = None
        if self.style_type in {'paragraph', 'character'}:
            if self.style_type == 'paragraph':
                for pPr in XPath('./w:pPr')(elem):
                    ps = ParagraphStyle(pPr)
                    if self.paragraph_style is None:
                        self.paragraph_style = ps
                    else:
                        self.paragraph_style.update(ps)
            for rPr in XPath('./w:rPr')(elem):
                rs = RunStyle(rPr)
                if self.character_style is None:
                    self.character_style = rs
                else:
                    self.character_style.update(rs)
        if self.style_type == 'numbering':
            self.numbering_style_link = None
            for x in XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem):
                self.numbering_style_link = get(x, 'w:val')
    def resolve_based_on(self, parent):
        if parent.paragraph_style is not None:
            if self.paragraph_style is None:
                self.paragraph_style = ParagraphStyle()
            self.paragraph_style.resolve_based_on(parent.paragraph_style)
        if parent.character_style is not None:
            if self.character_style is None:
                self.character_style = RunStyle()
            self.character_style.resolve_based_on(parent.character_style)
 class Styles(object):
    '''
    Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
    '''
    def __init__(self):
        self.id_map = OrderedDict()
        self.para_cache = {}
        self.para_char_cache = {}
        self.run_cache = {}
        self.classes = {}
        self.counter = Counter()
        self.default_styles = {}
        self.numbering_style_links = {}
    def __iter__(self):
        for s in self.id_map.itervalues():
            yield s
    def __getitem__(self, key):
        return self.id_map[key]
    def __len__(self):
        return len(self.id_map)
    def get(self, key, default=None):
        return self.id_map.get(key, default)
    def __call__(self, root, fonts):
        self.fonts = fonts
        for s in XPath('//w:style')(root):
            s = Style(s)
            if s.style_id:
                self.id_map[s.style_id] = s
            if s.is_default:
                self.default_styles[s.style_type] = s
            if s.style_type == 'numbering' and s.numbering_style_link:
                self.numbering_style_links[s.style_id] = s.numbering_style_link
        self.default_paragraph_style = self.default_character_style = None
        for dd in XPath('./w:docDefaults')(root):
            for pd in XPath('./w:pPrDefault')(dd):
                for pPr in XPath('./w:pPr')(pd):
                    ps = ParagraphStyle(pPr)
                    if self.default_paragraph_style is None:
                        self.default_paragraph_style = ps
                    else:
                        self.default_paragraph_style.update(ps)
            for pd in XPath('./w:rPrDefault')(dd):
                for pPr in XPath('./w:rPr')(pd):
                    ps = RunStyle(pPr)
                    if self.default_character_style is None:
                        self.default_character_style = ps
                    else:
                        self.default_character_style.update(ps)
        def resolve(s, p):
            if p is not None:
                if not p.resolved:
                    resolve(p, self.get(p.based_on))
                s.resolve_based_on(p)
            s.resolved = True
        for s in self:
            if not s.resolved:
                resolve(s, self.get(s.based_on))
    def para_val(self, parent_styles, direct_formatting, attr):
        val = getattr(direct_formatting, attr)
        if val is inherit:
            for ps in reversed(parent_styles):
                pval = getattr(ps, attr)
                if pval is not inherit:
                    val = pval
                    break
        return val
    def run_val(self, parent_styles, direct_formatting, attr):
        val = getattr(direct_formatting, attr)
        if val is not inherit:
            return val
        if attr in direct_formatting.toggle_properties:
            val = False
            for rs in parent_styles:
                pval = getattr(rs, attr)
                if pval is True:
                    val ^= True
            return val
        for rs in reversed(parent_styles):
            rval = getattr(rs, attr)
            if rval is not inherit:
                return rval
        return val
    def resolve_paragraph(self, p):
        ans = self.para_cache.get(p, None)
        if ans is None:
            ans = self.para_cache[p] = ParagraphStyle()
            ans.style_name = None
            direct_formatting = None
            for pPr in XPath('./w:pPr')(p):
                ps = ParagraphStyle(pPr)
                if direct_formatting is None:
                    direct_formatting = ps
                else:
                    direct_formatting.update(ps)
            if direct_formatting is None:
                direct_formatting = ParagraphStyle()
            parent_styles = []
            if self.default_paragraph_style is not None:
                parent_styles.append(self.default_paragraph_style)
            default_para = self.default_styles.get('paragraph', None)
            if direct_formatting.linked_style is not None:
                ls = self.get(direct_formatting.linked_style)
                if ls is not None:
                    ans.style_name = ls.name
                    ps = ls.paragraph_style
                    if ps is not None:
                        parent_styles.append(ps)
                    if ls.character_style is not None:
                        self.para_char_cache[p] = ls.character_style
            elif default_para is not None:
                if default_para.paragraph_style is not None:
                    parent_styles.append(default_para.paragraph_style)
                if default_para.character_style is not None:
                    self.para_char_cache[p] = default_para.character_style
            is_numbering = direct_formatting.numbering is not inherit
            if is_numbering:
                num_id, lvl = direct_formatting.numbering
                if num_id is not None:
                    p.set('calibre_num_id', '%s:%s' % (lvl, num_id))
                if num_id is not None and lvl is not None:
                    ps = self.numbering.get_para_style(num_id, lvl)
                    if ps is not None:
                        parent_styles.append(ps)
            for attr in ans.all_properties:
                if not (is_numbering and attr == 'text_indent'):  # skip text-indent for lists
                    setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
        return ans
    def resolve_run(self, r):
        ans = self.run_cache.get(r, None)
        if ans is None:
            p = r.getparent()
            ans = self.run_cache[r] = RunStyle()
            direct_formatting = None
            for rPr in XPath('./w:rPr')(r):
                rs = RunStyle(rPr)
                if direct_formatting is None:
                    direct_formatting = rs
                else:
                    direct_formatting.update(rs)
            if direct_formatting is None:
                direct_formatting = RunStyle()
            parent_styles = []
            default_char = self.default_styles.get('character', None)
            if self.default_character_style is not None:
                parent_styles.append(self.default_character_style)
            pstyle = self.para_char_cache.get(p, None)
            if pstyle is not None:
                parent_styles.append(pstyle)
            if direct_formatting.linked_style is not None:
                ls = self.get(direct_formatting.linked_style).character_style
                if ls is not None:
                    parent_styles.append(ls)
            elif default_char is not None and default_char.character_style is not None:
                parent_styles.append(default_char.character_style)
            for attr in ans.all_properties:
                setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
            if ans.font_family is not inherit:
                ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i)
        return ans
    def resolve(self, obj):
        if obj.tag.endswith('}p'):
            return self.resolve_paragraph(obj)
        if obj.tag.endswith('}r'):
            return self.resolve_run(obj)
    def cascade(self, layers):
        self.body_font_family = 'serif'
        self.body_font_size = '10pt'
        for p, runs in layers.iteritems():
            char_styles = [self.resolve_run(r) for r in runs]
            block_style = self.resolve_paragraph(p)
            c = Counter()
            for s in char_styles:
                if s.font_family is not inherit:
                    c[s.font_family] += 1
            if c:
                family = c.most_common(1)[0][0]
                block_style.font_family = family
                for s in char_styles:
                    if s.font_family == family:
                        s.font_family = inherit
            sizes = [s.font_size for s in char_styles if s.font_size is not inherit]
            if sizes:
                sz = block_style.font_size = sizes[0]
                for s in char_styles:
                    if s.font_size == sz:
                        s.font_size = inherit
        block_styles = [self.resolve_paragraph(p) for p in layers]
        c = Counter()
        for s in block_styles:
            if s.font_family is not inherit:
                c[s.font_family] += 1
        if c:
            self.body_font_family = family = c.most_common(1)[0][0]
            for s in block_styles:
                if s.font_family == family:
                    s.font_family = inherit
        c = Counter()
        for s in block_styles:
            if s.font_size is not inherit:
                c[s.font_size] += 1
        if c:
            sz = c.most_common(1)[0][0]
            for s in block_styles:
                if s.font_size == sz:
                    s.font_size = inherit
            self.body_font_size = '%.3gpt' % sz
    def resolve_numbering(self, numbering):
        # When a numPr element appears inside a paragraph style, the lvl info
        # must be discarder and pStyle used instead.
        self.numbering = numbering
        for style in self:
            ps = style.paragraph_style
            if ps is not None and ps.numbering is not inherit:
                lvl = numbering.get_pstyle(ps.numbering[0], style.style_id)
                if lvl is None:
                    ps.numbering = inherit
                else:
                    ps.numbering = (ps.numbering[0], lvl)
    def register(self, css, prefix):
        h = hash(frozenset(css.iteritems()))
        ans, _ = self.classes.get(h, (None, None))
        if ans is None:
            self.counter[prefix] += 1
            ans = '%s_%d' % (prefix, self.counter[prefix])
            self.classes[h] = (ans, css)
        return ans
    def generate_classes(self):
        for bs in self.para_cache.itervalues():
            css = bs.css
            if css:
                self.register(css, 'block')
        for bs in self.run_cache.itervalues():
            css = bs.css
            if css:
                self.register(css, 'text')
    def class_name(self, css):
        h = hash(frozenset(css.iteritems()))
        return self.classes.get(h, (None, None))[0]
    def generate_css(self, dest_dir, docx):
        ef = self.fonts.embed_fonts(dest_dir, docx)
        prefix = textwrap.dedent(
            '''\
            body { font-family: %s; font-size: %s }
            p { text-indent: 1.5em }
            ul, ol, p { margin: 0; padding: 0 }
            ''') % (self.body_font_family, self.body_font_size)
        if ef:
            prefix = ef + '\n' + prefix
        ans = []
        for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
            b = ('\t%s: %s;' % (k, v) for k, v in css.iteritems())
            b = '\n'.join(b)
            ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';')))
        return prefix + '\n' + '\n'.join(ans)
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@ -6,36 +6,250 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
-import sys, os
+import sys, os, re
 from collections import OrderedDict
 from lxml import html
-from lxml.html.builder import (HTML, HEAD, TITLE, BODY, LINK, META)
+from lxml.html.builder import (
    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
-from calibre.ebooks.docx.container import Container
+from calibre.ebooks.docx.container import DOCX, fromstring
 from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS
 from calibre.ebooks.docx.styles import Styles, inherit
 from calibre.ebooks.docx.numbering import Numbering
 from calibre.ebooks.docx.fonts import Fonts
 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
 class Text:
    def __init__(self, elem, attr, buf):
        self.elem, self.attr, self.buf = elem, attr, buf
    def add_elem(self, elem):
        setattr(self.elem, self.attr, ''.join(self.buf))
        self.elem, self.attr, self.buf = elem, 'tail', []
 class Convert(object):
    def __init__(self, path_or_stream, dest_dir=None, log=None):
-        self.container = Container(path_or_stream, log=log)
+        self.docx = DOCX(path_or_stream, log=log)
-        self.log = self.container.log
+        self.log = self.docx.log
        self.dest_dir = dest_dir or os.getcwdu()
        self.mi = self.docx.metadata
        self.body = BODY()
        self.styles = Styles()
        self.object_map = OrderedDict()
        self.html = HTML(
            HEAD(
                META(charset='utf-8'),
-                TITLE('TODO: read from metadata'),
+                TITLE(self.mi.title or _('Unknown')),
                LINK(rel='stylesheet', type='text/css', href='docx.css'),
            ),
            self.body
        )
        self.html.text='\n\t'
        self.html[0].text='\n\t\t'
        self.html[0].tail='\n'
        for child in self.html[0]:
            child.tail = '\n\t\t'
        self.html[0][-1].tail = '\n\t'
        self.html[1].text = self.html[1].tail = '\n'
        lang = canonicalize_lang(self.mi.language)
        if lang and lang != 'und':
            lang = lang_as_iso639_1(lang)
            if lang:
                self.html.set('lang', lang)
    def __call__(self):
        doc = self.docx.document
        relationships_by_id, relationships_by_type = self.docx.document_relationships
        self.read_styles(relationships_by_type)
        self.layers = OrderedDict()
        for wp in XPath('//w:p')(doc):
            p = self.convert_p(wp)
            self.body.append(p)
        # TODO: tables <w:tbl> child of <w:body> (nested tables?)
        # TODO: Last section properties <w:sectPr> child of <w:body>
        self.styles.cascade(self.layers)
        numbered = []
        for html_obj, obj in self.object_map.iteritems():
            raw = obj.get('calibre_num_id', None)
            if raw is not None:
                lvl, num_id = raw.partition(':')[0::2]
                try:
                    lvl = int(lvl)
                except (TypeError, ValueError):
                    lvl = 0
                numbered.append((html_obj, num_id, lvl))
        self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
        if len(self.body) > 0:
            self.body.text = '\n\t'
            for child in self.body:
                child.tail = '\n\t'
            self.body[-1].tail = '\n'
        self.styles.generate_classes()
        for html_obj, obj in self.object_map.iteritems():
            style = self.styles.resolve(obj)
            if style is not None:
                css = style.css
                if css:
                    cls = self.styles.class_name(css)
                    if cls:
                        html_obj.set('class', cls)
        self.write()
    def read_styles(self, relationships_by_type):
        def get_name(rtype, defname):
            name = relationships_by_type.get(rtype, None)
            if name is None:
                cname = self.docx.document_name.split('/')
                cname[-1] = defname
                if self.docx.exists(cname):
                    name = name
            return name
        nname = get_name(NUMBERING, 'numbering.xml')
        sname = get_name(STYLES, 'styles.xml')
        fname = get_name(FONTS, 'fontTable.xml')
        numbering = self.numbering = Numbering()
        fonts = self.fonts = Fonts()
        if fname is not None:
            embed_relationships = self.docx.get_relationships(fname)[0]
            try:
                raw = self.docx.read(fname)
            except KeyError:
                self.log.warn('Fonts table %s does not exist' % fname)
            else:
                fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir)
        if sname is not None:
            try:
                raw = self.docx.read(sname)
            except KeyError:
                self.log.warn('Styles %s do not exist' % sname)
            else:
                self.styles(fromstring(raw), fonts)
        if nname is not None:
            try:
                raw = self.docx.read(nname)
            except KeyError:
                self.log.warn('Numbering styles %s do not exist' % nname)
            else:
                numbering(fromstring(raw), self.styles)
        self.styles.resolve_numbering(numbering)
    def write(self):
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx)
        if css:
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))
    def convert_p(self, p):
        dest = P()
        self.object_map[dest] = p
        style = self.styles.resolve_paragraph(p)
        self.layers[p] = []
        for run in XPath('descendant::w:r')(p):
            span = self.convert_run(run)
            dest.append(span)
            self.layers[p].append(run)
        m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
        if m is not None:
            n = min(1, max(6, int(m.group(1))))
            dest.tag = 'h%d' % n
        if style.direction == 'rtl':
            dest.set('dir', 'rtl')
        border_runs = []
        common_borders = []
        for span in dest:
            run = self.object_map[span]
            style = self.styles.resolve_run(run)
            if not border_runs or border_runs[-1][1].same_border(style):
                border_runs.append((span, style))
            elif border_runs:
                if len(border_runs) > 1:
                    common_borders.append(border_runs)
                border_runs = []
        for border_run in common_borders:
            spans = []
            bs = {}
            for span, style in border_run:
                style.get_border_css(bs)
                style.clear_border_css()
                spans.append(span)
            if bs:
                cls = self.styles.register(bs, 'text_border')
                wrapper = self.wrap_elems(spans, SPAN())
                wrapper.set('class', cls)
        return dest
    def wrap_elems(self, elems, wrapper):
        p = elems[0].getparent()
        idx = p.index(elems[0])
        p.insert(idx, wrapper)
        wrapper.tail = elems[-1].tail
        elems[-1].tail = None
        for elem in elems:
            p.remove(elem)
            wrapper.append(elem)
    def convert_run(self, run):
        ans = SPAN()
        self.object_map[ans] = run
        text = Text(ans, 'text', [])
        for child in run:
            if is_tag(child, 'w:t'):
                if not child.text:
                    continue
                space = child.get(XML('space'), None)
                if space == 'preserve':
                    text.add_elem(SPAN(child.text, style="whitespace:pre-wrap"))
                    ans.append(text.elem)
                else:
                    text.buf.append(child.text)
            elif is_tag(child, 'w:cr'):
                text.add_elem(BR())
                ans.append(text.elem)
            elif is_tag(child, 'w:br'):
                typ = child.get('type', None)
                if typ in {'column', 'page'}:
                    br = BR(style='page-break-after:always')
                else:
                    clear = child.get('clear', None)
                    if clear in {'all', 'left', 'right'}:
                        br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
                    else:
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
        if text.buf:
            setattr(text.elem, text.attr, ''.join(text.buf))
        style = self.styles.resolve_run(run)
        if style.vert_align in {'superscript', 'subscript'}:
            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
        if style.lang is not inherit:
            ans.lang = style.lang
        return ans
 if __name__ == '__main__':
-    Convert(sys.argv[-1])()
+    from calibre.utils.logging import default_log
    default_log.filter_level = default_log.DEBUG
    Convert(sys.argv[-1], log=default_log)()
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -136,7 +136,7 @@ class FB2MLizer(object):
            metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
-            metadata['author'] = u'<author><first-name></first-name><last-name><last-name></author>'
+            metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'
        metadata['keywords'] = u''
        tags = list(map(unicode, self.oeb_book.metadata.subject))
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@ -163,7 +163,8 @@ class MOBIFile(object):
            ext = 'dat'
            prefix = 'binary'
            suffix = ''
-            if sig in {b'HUFF', b'CDIC', b'INDX'}: continue
+            if sig in {b'HUFF', b'CDIC', b'INDX'}:
                continue
            # TODO: Ignore CNCX records as well
            if sig == b'FONT':
                font = read_font_record(rec.raw)
@ -196,7 +197,6 @@ class MOBIFile(object):
            vals = list(index)[:-1] + [None, None, None, None]
            entry_map.append(Entry(*(vals[:12])))
        indexing_data = collect_indexing_data(entry_map, list(map(len,
            self.text_records)))
        self.indexing_data = [DOC + '\n' +textwrap.dedent('''\
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -16,7 +16,8 @@ from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
 from calibre.utils.magick.draw import identify_data
 MBP_NS = 'http://mobipocket.com/ns/mbp'
-def MBP(name): return '{%s}%s' % (MBP_NS, name)
+def MBP(name):
    return '{%s}%s' % (MBP_NS, name)
 MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS}
@ -413,7 +414,7 @@ class MobiMLizer(object):
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
-                            pixs = int(round(float(value) / \
+                            pixs = int(round(float(value) /
                                (72./self.profile.dpi)))
                        except:
                            continue
@ -488,8 +489,6 @@ class MobiMLizer(object):
        if elem.text:
            if istate.preserve:
                text = elem.text
            elif len(elem) > 0 and isspace(elem.text):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@ -181,9 +181,9 @@ class BookHeader(object):
                self.codec = 'cp1252' if not user_encoding else user_encoding
                log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
                    self.codec))
-            # Some KF8 files have header length == 256 (generated by kindlegen
+            # Some KF8 files have header length == 264 (generated by kindlegen
-            # 2.7?). See https://bugs.launchpad.net/bugs/1067310
+            # 2.9?). See https://bugs.launchpad.net/bugs/1179144
-            max_header_length = 0x100
+            max_header_length = 500  # We choose 500 for future versions of kindlegen
            if (ident == 'TEXTREAD' or self.length < 0xE4 or
                    self.length > max_header_length or
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -871,6 +871,7 @@ class Manifest(object):
            orig_data = data
            fname = urlunquote(self.href)
            self.oeb.log.debug('Parsing', fname, '...')
            self.oeb.html_preprocessor.current_href = self.href
            try:
                data = parse_html(data, log=self.oeb.log,
                        decoder=self.oeb.decode,
@ -1312,9 +1313,9 @@ class Guide(object):
                         ('notes', __('Notes')),
                         ('preface', __('Preface')),
                         ('text', __('Main Text'))]
-        TYPES = set(t for t, _ in _TYPES_TITLES)
+        TYPES = set(t for t, _ in _TYPES_TITLES)  # noqa
        TITLES = dict(_TYPES_TITLES)
-        ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES))
+        ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES))  # noqa
        def __init__(self, oeb, type, title, href):
            self.oeb = oeb
--- a/src/calibre/ebooks/oeb/iterator/init.py
+++ b/src/calibre/ebooks/oeb/iterator/init.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, re
+import sys, os, re
 from calibre.customize.ui import available_input_formats
@ -26,17 +26,18 @@ def EbookIterator(*args, **kwargs):
    from calibre.ebooks.oeb.iterator.book import EbookIterator
    return EbookIterator(*args, **kwargs)
-def get_preprocess_html(path_to_ebook, output):
+def get_preprocess_html(path_to_ebook, output=None):
-    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
+    from calibre.ebooks.conversion.plumber import set_regex_wizard_callback, Plumber
-    iterator = EbookIterator(path_to_ebook)
+    from calibre.utils.logging import DevNull
-    iterator.__enter__(only_input_plugin=True, run_char_count=False,
+    from calibre.ptempfile import TemporaryDirectory
-            read_anchor_map=False)
+    raw = {}
-    preprocessor = HTMLPreProcessor(None, False)
+    set_regex_wizard_callback(raw.__setitem__)
-    with open(output, 'wb') as out:
+    with TemporaryDirectory('_regex_wiz') as tdir:
-        for path in iterator.spine:
+        pl = Plumber(path_to_ebook, os.path.join(tdir, 'a.epub'), DevNull(), for_regex_wizard=True)
-            with open(path, 'rb') as f:
+        pl.run()
-                html = f.read().decode('utf-8', 'replace')
+        items = [raw[item.href] for item in pl.oeb.spine if item.href in raw]
-            html = preprocessor(html, get_preprocess_html=True)
+
    with (sys.stdout if output is None else open(output, 'wb')) as out:
        for html in items:
            out.write(html.encode('utf-8'))
            out.write(b'\n\n' + b'-'*80 + b'\n\n')
--- a/src/calibre/ebooks/oeb/iterator/book.py
+++ b/src/calibre/ebooks/oeb/iterator/book.py
@ -25,7 +25,7 @@ from calibre.ebooks.oeb.transforms.cover import CoverManager
 from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
 from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
-TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
+TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(
        '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
        ).replace('__width__', '600').replace('__height__', '800')
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -44,8 +44,10 @@ META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]')
 def merge_multiple_html_heads_and_bodies(root, log=None):
    heads, bodies = xpath(root, '//h:head'), xpath(root, '//h:body')
-    if not (len(heads) > 1 or len(bodies) > 1): return root
+    if not (len(heads) > 1 or len(bodies) > 1):
-    for child in root: root.remove(child)
+        return root
    for child in root:
        root.remove(child)
    head = root.makeelement(XHTML('head'))
    body = root.makeelement(XHTML('body'))
    for h in heads:
@ -368,8 +370,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
        meta.getparent().remove(meta)
    meta = etree.SubElement(head, XHTML('meta'),
        attrib={'http-equiv': 'Content-Type'})
-    meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
+    meta.set('content', 'text/html; charset=utf-8')  # Ensure content is second attribute
                                                    # attribute
    # Ensure has a <body/>
    if not xpath(data, '/h:html/h:body'):
--- a/src/calibre/ebooks/oeb/polish/toc.py
+++ b/src/calibre/ebooks/oeb/polish/toc.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 import re
 from urlparse import urlparse
-from collections import deque
+from collections import deque, Counter
 from functools import partial
 from lxml import etree
@ -29,7 +29,8 @@ class TOC(object):
    def __init__(self, title=None, dest=None, frag=None):
        self.title, self.dest, self.frag = title, dest, frag
        self.dest_exists = self.dest_error = None
-        if self.title: self.title = self.title.strip()
+        if self.title:
            self.title = self.title.strip()
        self.parent = None
        self.children = []
@ -326,11 +327,13 @@ def create_ncx(toc, to_href, btitle, lang, uid):
    navmap = etree.SubElement(ncx, NCX('navMap'))
    spat = re.compile(r'\s+')
-    def process_node(xml_parent, toc_parent, play_order=0):
+    play_order = Counter()
    def process_node(xml_parent, toc_parent):
        for child in toc_parent:
-            play_order += 1
+            play_order['c'] += 1
            point = etree.SubElement(xml_parent, NCX('navPoint'), id=uuid_id(),
-                            playOrder=str(play_order))
+                            playOrder=str(play_order['c']))
            label = etree.SubElement(point, NCX('navLabel'))
            title = child.title
            if title:
@ -341,7 +344,7 @@ def create_ncx(toc, to_href, btitle, lang, uid):
                if child.frag:
                    href += '#'+child.frag
                etree.SubElement(point, NCX('content'), src=href)
-            process_node(point, child, play_order)
+            process_node(point, child)
    process_node(navmap, toc)
    return ncx
--- a/src/calibre/ebooks/pdf/render/links.py
+++ b/src/calibre/ebooks/pdf/render/links.py
@ -45,11 +45,15 @@ class Links(object):
            href, page, rect = link
            p, frag = href.partition('#')[0::2]
            try:
-                link = ((path, p, frag or None), self.pdf.get_pageref(page).obj, Array(rect))
+                pref = self.pdf.get_pageref(page).obj
            except IndexError:
-                self.log.warn('Unable to find page for link: %r, ignoring it' % link)
+                try:
                    pref = self.pdf.get_pageref(page-1).obj
                except IndexError:
                    self.pdf.debug('Unable to find page for link: %r, ignoring it' % link)
                    continue
-            self.links.append(link)
+                self.pdf.debug('The link %s points to non-existent page, moving it one page back' % href)
            self.links.append(((path, p, frag or None), pref, Array(rect)))
    def add_links(self):
        for link in self.links:
--- a/src/calibre/gui2/actions/show_quickview.py
+++ b/src/calibre/gui2/actions/show_quickview.py
@ -38,6 +38,13 @@ class ShowQuickviewAction(InterfaceAction):
                Quickview(self.gui, self.gui.library_view, index)
            self.current_instance.show()
    def change_quickview_column(self, idx):
        self.show_quickview()
        if self.current_instance:
            if self.current_instance.is_closed:
                return
            self.current_instance.change_quickview_column.emit(idx)
    def library_changed(self, db):
        if self.current_instance and not self.current_instance.is_closed:
            self.current_instance.set_database(db)
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -122,7 +122,8 @@ def device_name_for_plugboards(device_class):
 class DeviceManager(Thread): # {{{
    def __init__(self, connected_slot, job_manager, open_feedback_slot,
-            open_feedback_msg, allow_connect_slot, sleep_time=2):
+                 open_feedback_msg, allow_connect_slot,
                 after_callback_feedback_slot, sleep_time=2):
        '''
        :sleep_time: Time to sleep between device probes in secs
        '''
@ -150,6 +151,7 @@ class DeviceManager(Thread): # {{{
        self.ejected_devices  = set([])
        self.mount_connection_requests = Queue.Queue(0)
        self.open_feedback_slot = open_feedback_slot
        self.after_callback_feedback_slot = after_callback_feedback_slot
        self.open_feedback_msg = open_feedback_msg
        self._device_information = None
        self.current_library_uuid = None
@ -392,6 +394,10 @@ class DeviceManager(Thread): # {{{
                        self.device.set_progress_reporter(job.report_progress)
                    self.current_job.run()
                    self.current_job = None
                    feedback = getattr(self.device, 'user_feedback_after_callback', None)
                    if feedback is not None:
                        self.device.user_feedback_after_callback = None
                        self.after_callback_feedback_slot(feedback)
                else:
                    break
            if do_sleep:
@ -850,7 +856,7 @@ class DeviceMixin(object): # {{{
        self.device_manager = DeviceManager(FunctionDispatcher(self.device_detected),
                self.job_manager, Dispatcher(self.status_bar.show_message),
                Dispatcher(self.show_open_feedback),
-                FunctionDispatcher(self.allow_connect))
+                FunctionDispatcher(self.allow_connect), Dispatcher(self.after_callback_feedback))
        self.device_manager.start()
        self.device_manager.devices_initialized.wait()
        if tweaks['auto_connect_to_folder']:
@ -862,6 +868,10 @@ class DeviceMixin(object): # {{{
                name, show_copy_button=False,
                override_icon=QIcon(icon))
    def after_callback_feedback(self, feedback):
        title, msg, det_msg = feedback
        info_dialog(self, feedback['title'], feedback['msg'], det_msg=feedback['det_msg']).show()
    def debug_detection(self, done):
        self.debug_detection_callback = weakref.ref(done)
        self.device_manager.debug_detection(FunctionDispatcher(self.debug_detection_done))
@ -1116,7 +1126,7 @@ class DeviceMixin(object): # {{{
            return
        dm = self.iactions['Remove Books'].delete_memory
-        if dm.has_key(job):
+        if job in dm:
            paths, model = dm.pop(job)
            self.device_manager.remove_books_from_metadata(paths,
                    self.booklists())
@ -1141,7 +1151,7 @@ class DeviceMixin(object): # {{{
    def dispatch_sync_event(self, dest, delete, specific):
        rows = self.library_view.selectionModel().selectedRows()
        if not rows or len(rows) == 0:
-            error_dialog(self, _('No books'), _('No books')+' '+\
+            error_dialog(self, _('No books'), _('No books')+' '+
                    _('selected to send')).exec_()
            return
@ -1160,7 +1170,7 @@ class DeviceMixin(object): # {{{
                if fmts:
                    for f in fmts.split(','):
                        f = f.lower()
-                        if format_count.has_key(f):
+                        if f in format_count:
                            format_count[f] += 1
                        else:
                            format_count[f] = 1
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@ -28,7 +28,10 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
        all_formats = set(all_formats)
        self.calibre_known_formats = device.FORMATS
        try:
            self.device_name = device.get_gui_name()
        except TypeError:
            self.device_name = getattr(device, 'gui_name', None) or _('Device')
        if device.USER_CAN_ADD_NEW_FORMATS:
            all_formats = set(all_formats) | set(BOOK_EXTENSIONS)
--- a/src/calibre/gui2/dialogs/quickview.py
+++ b/src/calibre/gui2/dialogs/quickview.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 from PyQt4.Qt import (Qt, QDialog, QAbstractItemView, QTableWidgetItem,
                      QListWidgetItem, QByteArray, QCoreApplication,
-                      QApplication)
+                      QApplication, pyqtSignal)
 from calibre.customize.ui import find_plugin
 from calibre.gui2 import gprefs
@ -44,6 +44,8 @@ class TableItem(QTableWidgetItem):
 class Quickview(QDialog, Ui_Quickview):
    change_quickview_column   = pyqtSignal(object)
    def __init__(self, gui, view, row):
        QDialog.__init__(self, gui, flags=Qt.Window)
        Ui_Quickview.__init__(self)
@ -105,6 +107,7 @@ class Quickview(QDialog, Ui_Quickview):
        self.refresh(row)
        self.view.clicked.connect(self.slave)
        self.change_quickview_column.connect(self.slave)
        QCoreApplication.instance().aboutToQuit.connect(self.save_state)
        self.search_button.clicked.connect(self.do_search)
        view.model().new_bookdisplay_data.connect(self.book_was_changed)
@ -146,6 +149,9 @@ class Quickview(QDialog, Ui_Quickview):
        key = self.view.model().column_map[self.current_column]
        book_id = self.view.model().id(bv_row)
        if self.current_book_id == book_id and self.current_key == key:
            return
        # Only show items for categories
        if not self.db.field_metadata[key]['is_category']:
            if self.current_key is None:
@ -164,6 +170,8 @@ class Quickview(QDialog, Ui_Quickview):
        if vals:
            self.no_valid_items = False
            if self.db.field_metadata[key]['datatype'] == 'rating':
                vals = unicode(vals/2)
            if not isinstance(vals, list):
                vals = [vals]
            vals.sort(key=sort_key)
@ -198,8 +206,7 @@ class Quickview(QDialog, Ui_Quickview):
            sv = selected_item
        sv = sv.replace('"', r'\"')
        self.last_search = self.current_key+':"=' + sv + '"'
-        books = self.db.search_getting_ids(self.last_search,
+        books = self.db.search(self.last_search, return_matches=True)
                                           self.db.data.search_restriction)
        self.books_table.setRowCount(len(books))
        self.books_label.setText(_('Books with selected item "{0}": {1}').
--- a/src/calibre/gui2/dialogs/template_dialog.py
+++ b/src/calibre/gui2/dialogs/template_dialog.py
@ -3,17 +3,21 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __license__   = 'GPL v3'
-import json
+import json, os, traceback
 from PyQt4.Qt import (Qt, QDialog, QDialogButtonBox, QSyntaxHighlighter, QFont,
-                      QRegExp, QApplication, QTextCharFormat, QColor, QCursor)
+                      QRegExp, QApplication, QTextCharFormat, QColor, QCursor,
                      QIcon, QSize)
-from calibre.gui2 import error_dialog
+from calibre import sanitize_file_name_unicode
 from calibre.constants import config_dir
 from calibre.gui2.dialogs.template_dialog_ui import Ui_TemplateDialog
 from calibre.utils.formatter_functions import formatter_functions
 from calibre.utils.icu import sort_key
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.book.formatter import SafeFormat
-from calibre.library.coloring import (displayable_columns)
+from calibre.library.coloring import (displayable_columns, color_row_key)
 from calibre.gui2 import error_dialog, choose_files, pixmap_to_data
 class ParenPosition:
@ -198,25 +202,56 @@ class TemplateHighlighter(QSyntaxHighlighter):
 class TemplateDialog(QDialog, Ui_TemplateDialog):
-    def __init__(self, parent, text, mi=None, fm=None, color_field=None):
+    def __init__(self, parent, text, mi=None, fm=None, color_field=None,
                 icon_field_key=None, icon_rule_kind=None):
        QDialog.__init__(self, parent)
        Ui_TemplateDialog.__init__(self)
        self.setupUi(self)
        self.coloring = color_field is not None
        self.iconing = icon_field_key is not None
        cols = []
        if fm is not None:
            for key in sorted(displayable_columns(fm),
                              key=lambda(k): sort_key(fm[k]['name']) if k != color_row_key else 0):
                if key == color_row_key and not self.coloring:
                    continue
                from calibre.gui2.preferences.coloring import all_columns_string
                name = all_columns_string if key == color_row_key else fm[key]['name']
                if name:
                    cols.append((name, key))
        self.color_layout.setVisible(False)
        self.icon_layout.setVisible(False)
        if self.coloring:
-            cols = sorted([k for k in displayable_columns(fm)])
+            self.color_layout.setVisible(True)
-            self.colored_field.addItems(cols)
+            for n1, k1 in cols:
-            self.colored_field.setCurrentIndex(self.colored_field.findText(color_field))
+                self.colored_field.addItem(n1, k1)
            self.colored_field.setCurrentIndex(self.colored_field.findData(color_field))
            colors = QColor.colorNames()
            colors.sort()
            self.color_name.addItems(colors)
-        else:
+        elif self.iconing:
-            self.colored_field.setVisible(False)
+            self.icon_layout.setVisible(True)
-            self.colored_field_label.setVisible(False)
+            for n1, k1 in cols:
-            self.color_chooser_label.setVisible(False)
+                self.icon_field.addItem(n1, k1)
-            self.color_name.setVisible(False)
+            self.icon_file_names = []
-            self.color_copy_button.setVisible(False)
+            d = os.path.join(config_dir, 'cc_icons')
            if os.path.exists(d):
                for icon_file in os.listdir(d):
                    icon_file = icu_lower(icon_file)
                    if os.path.exists(os.path.join(d, icon_file)):
                        if icon_file.endswith('.png'):
                            self.icon_file_names.append(icon_file)
            self.icon_file_names.sort(key=sort_key)
            self.update_filename_box()
            self.icon_with_text.setChecked(True)
            if icon_rule_kind == 'icon_only':
                self.icon_without_text.setChecked(True)
            self.icon_field.setCurrentIndex(self.icon_field.findData(icon_field_key))
        if mi:
            self.mi = mi
        else:
@ -248,6 +283,8 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
        self.buttonBox.button(QDialogButtonBox.Ok).setText(_('&OK'))
        self.buttonBox.button(QDialogButtonBox.Cancel).setText(_('&Cancel'))
        self.color_copy_button.clicked.connect(self.color_to_clipboard)
        self.filename_button.clicked.connect(self.filename_button_clicked)
        self.icon_copy_button.clicked.connect(self.icon_to_clipboard)
        try:
            with open(P('template-functions.json'), 'rb') as f:
@ -276,11 +313,55 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
                '<a href="http://manual.calibre-ebook.com/template_ref.html">'
                '%s</a>'%tt)
    def filename_button_clicked(self):
        try:
            path = choose_files(self, 'choose_category_icon',
                        _('Select Icon'), filters=[
                        ('Images', ['png', 'gif', 'jpg', 'jpeg'])],
                    all_files=False, select_only_single_file=True)
            if path:
                icon_path = path[0]
                icon_name = sanitize_file_name_unicode(
                             os.path.splitext(
                                   os.path.basename(icon_path))[0]+'.png')
                if icon_name not in self.icon_file_names:
                    self.icon_file_names.append(icon_name)
                    self.update_filename_box()
                    try:
                        p = QIcon(icon_path).pixmap(QSize(128, 128))
                        d = os.path.join(config_dir, 'cc_icons')
                        if not os.path.exists(os.path.join(d, icon_name)):
                            if not os.path.exists(d):
                                os.makedirs(d)
                            with open(os.path.join(d, icon_name), 'wb') as f:
                                f.write(pixmap_to_data(p, format='PNG'))
                    except:
                        traceback.print_exc()
                self.icon_files.setCurrentIndex(self.icon_files.findText(icon_name))
                self.icon_files.adjustSize()
        except:
            traceback.print_exc()
        return
    def update_filename_box(self):
        self.icon_files.clear()
        self.icon_file_names.sort(key=sort_key)
        self.icon_files.addItem('')
        self.icon_files.addItems(self.icon_file_names)
        for i,filename in enumerate(self.icon_file_names):
            icon = QIcon(os.path.join(config_dir, 'cc_icons', filename))
            self.icon_files.setItemIcon(i+1, icon)
    def color_to_clipboard(self):
        app = QApplication.instance()
        c = app.clipboard()
        c.setText(unicode(self.color_name.currentText()))
    def icon_to_clipboard(self):
        app = QApplication.instance()
        c = app.clipboard()
        c.setText(unicode(self.icon_files.currentText()))
    def textbox_changed(self):
        cur_text = unicode(self.textbox.toPlainText())
        if self.last_text != cur_text:
@ -324,5 +405,14 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
                    _('The template box cannot be empty'), show=True)
                return
-        self.rule = (unicode(self.colored_field.currentText()), txt)
+            self.rule = (unicode(self.colored_field.itemData(
                                self.colored_field.currentIndex()).toString()), txt)
        elif self.iconing:
            rt = 'icon' if self.icon_with_text.isChecked() else 'icon_only'
            self.rule = (rt,
                         unicode(self.icon_field.itemData(
                                self.icon_field.currentIndex()).toString()),
                         txt)
        else:
            self.rule = ('', txt)
        QDialog.accept(self)
--- a/src/calibre/gui2/dialogs/template_dialog.ui
+++ b/src/calibre/gui2/dialogs/template_dialog.ui
@ -21,6 +21,7 @@
  </property>
  <layout class="QVBoxLayout" name="verticalLayout">
   <item>
    <widget class="QWidget" name="color_layout">
     <layout class="QGridLayout">
      <item row="0" column="0">
       <widget class="QLabel" name="colored_field_label">
@ -62,6 +63,97 @@
       </widget>
      </item>
     </layout>
    </widget>
   </item>
   <item>
    <widget class="QWidget" name="icon_layout">
     <layout class="QGridLayout">
      <item row="0" column="0" colspan="2">
       <widget class="QGroupBox">
        <property name="title">
         <string>Kind</string>
        </property>
        <layout class="QHBoxLayout">
         <item>
          <widget class="QRadioButton" name="icon_without_text">
           <property name="text">
            <string>icon with no text</string>
           </property>
          </widget>
         </item>
         <item>
          <widget class="QRadioButton" name="icon_with_text">
           <property name="text">
            <string>icon with text</string>
           </property>
          </widget>
         </item>
        </layout>
        <property name="sizePolicy">
         <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
          <horstretch>100</horstretch>
          <verstretch>0</verstretch>
         </sizepolicy>
        </property>
       </widget>
      </item>
      <item row="1" column="0">
       <widget class="QLabel" name="icon_chooser_label">
        <property name="text">
         <string>Apply the icon to column:</string>
        </property>
        <property name="buddy">
         <cstring>icon_field</cstring>
        </property>
       </widget>
      </item>
      <item row="1" column="1">
       <widget class="QComboBox" name="icon_field">
       </widget>
      </item>
      <item row="2" column="0">
       <widget class="QLabel" name="image_chooser_label">
        <property name="text">
         <string>Copy an icon file name to the clipboard:</string>
        </property>
        <property name="buddy">
         <cstring>color_name</cstring>
        </property>
       </widget>
      </item>
      <item row="2" column="1">
       <widget class="QWidget">
        <layout class="QHBoxLayout">
         <item>
          <widget class="QComboBox" name="icon_files">
          </widget>
         </item>
         <item>
          <widget class="QToolButton" name="icon_copy_button">
           <property name="icon">
            <iconset resource="../../../../resources/images.qrc">
             <normaloff>:/images/edit-copy.png</normaloff>:/images/edit-copy.png</iconset>
           </property>
           <property name="toolTip">
            <string>Copy the selected icon file name to the clipboard</string>
           </property>
          </widget>
         </item>
         <item>
          <widget class="QPushButton" name="filename_button">
           <property name="text">
            <string>Add icon</string>
           </property>
           <property name="toolTip">
            <string>Add an icon file to the set of choices</string>
           </property>
          </widget>
         </item>
        </layout>
       </widget>
      </item>
     </layout>
    </widget>
   </item>
   <item>
    <widget class="QPlainTextEdit" name="textbox"/>
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -10,9 +10,9 @@ from functools import partial
 from future_builtins import map
 from collections import OrderedDict
-from PyQt4.Qt import (QTableView, Qt, QAbstractItemView, QMenu, pyqtSignal,
+from PyQt4.Qt import (QTableView, Qt, QAbstractItemView, QMenu, pyqtSignal, QFont,
-    QModelIndex, QIcon, QItemSelection, QMimeData, QDrag, QApplication,
+    QModelIndex, QIcon, QItemSelection, QMimeData, QDrag, QApplication, QStyle,
-    QPoint, QPixmap, QUrl, QImage, QPainter, QColor, QRect)
+    QPoint, QPixmap, QUrl, QImage, QPainter, QColor, QRect, QHeaderView, QStyleOptionHeader)
 from calibre.gui2.library.delegates import (RatingDelegate, PubDateDelegate,
    TextDelegate, DateDelegate, CompleteDelegate, CcTextDelegate,
@ -25,6 +25,54 @@ from calibre.gui2.library import DEFAULT_SORT
 from calibre.constants import filesystem_encoding
 from calibre import force_unicode
 class HeaderView(QHeaderView):  # {{{
    def __init__(self, *args):
        QHeaderView.__init__(self, *args)
        self.hover = -1
        self.current_font = QFont(self.font())
        self.current_font.setBold(True)
        self.current_font.setItalic(True)
    def event(self, e):
        if e.type() in (e.HoverMove, e.HoverEnter):
            self.hover = self.logicalIndexAt(e.pos())
        elif e.type() in (e.Leave, e.HoverLeave):
            self.hover = -1
        return QHeaderView.event(self, e)
    def paintSection(self, painter, rect, logical_index):
        opt = QStyleOptionHeader()
        self.initStyleOption(opt)
        opt.rect = rect
        opt.section = logical_index
        opt.orientation = self.orientation()
        opt.textAlignment = Qt.AlignHCenter | Qt.AlignVCenter
        model = self.parent().model()
        opt.text = model.headerData(logical_index, opt.orientation, Qt.DisplayRole).toString()
        if self.isSortIndicatorShown() and self.sortIndicatorSection() == logical_index:
            opt.sortIndicator = QStyleOptionHeader.SortDown if self.sortIndicatorOrder() == Qt.AscendingOrder else QStyleOptionHeader.SortUp
        opt.text = opt.fontMetrics.elidedText(opt.text, Qt.ElideRight, rect.width() - 4)
        if self.isEnabled():
            opt.state |= QStyle.State_Enabled
            if self.window().isActiveWindow():
                opt.state |= QStyle.State_Active
                if self.hover == logical_index:
                    opt.state |= QStyle.State_MouseOver
        sm = self.selectionModel()
        if opt.orientation == Qt.Vertical:
            if sm.isRowSelected(logical_index, QModelIndex()):
                opt.state |= QStyle.State_Sunken
        painter.save()
        if (
                (opt.orientation == Qt.Horizontal and sm.currentIndex().column() == logical_index) or
                (opt.orientation == Qt.Vertical and sm.currentIndex().row() == logical_index)):
            painter.setFont(self.current_font)
        self.style().drawControl(QStyle.CE_Header, opt, painter, self)
        painter.restore()
 # }}}
 class PreserveViewState(object):  # {{{
    '''
@ -72,7 +120,8 @@ class PreserveViewState(object): # {{{
            return {x:getattr(self, x) for x in ('selected_ids', 'current_id',
                'vscroll', 'hscroll')}
        def fset(self, state):
-            for k, v in state.iteritems(): setattr(self, k, v)
+            for k, v in state.iteritems():
                setattr(self, k, v)
            self.__exit__()
        return property(fget=fget, fset=fset)
@ -90,6 +139,7 @@ class BooksView(QTableView): # {{{
    def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True):
        QTableView.__init__(self, parent)
        self.setProperty('highlight_current_item', 150)
        self.row_sizing_done = False
        if not tweaks['horizontal_scrolling_per_column']:
@ -152,12 +202,16 @@ class BooksView(QTableView): # {{{
        # {{{ Column Header setup
        self.can_add_columns = True
        self.was_restored = False
-        self.column_header = self.horizontalHeader()
+        self.column_header = HeaderView(Qt.Horizontal, self)
        self.setHorizontalHeader(self.column_header)
        self.column_header.setMovable(True)
        self.column_header.setClickable(True)
        self.column_header.sectionMoved.connect(self.save_state)
        self.column_header.setContextMenuPolicy(Qt.CustomContextMenu)
        self.column_header.customContextMenuRequested.connect(self.show_column_header_context_menu)
        self.column_header.sectionResized.connect(self.column_resized, Qt.QueuedConnection)
        self.row_header = HeaderView(Qt.Vertical, self)
        self.setVerticalHeader(self.row_header)
        # }}}
        self._model.database_changed.connect(self.database_changed)
@ -197,6 +251,16 @@ class BooksView(QTableView): # {{{
        elif action.startswith('align_'):
            alignment = action.partition('_')[-1]
            self._model.change_alignment(column, alignment)
        elif action == 'quickview':
            from calibre.customize.ui import find_plugin
            qv = find_plugin('Show Quickview')
            if qv:
                rows = self.selectionModel().selectedRows()
                if len(rows) > 0:
                    current_row = rows[0].row()
                    current_col = self.column_map.index(column)
                    index = self.model().index(current_row, current_col)
                    qv.actual_plugin_.change_quickview_column(index)
        self.save_state()
@ -225,7 +289,7 @@ class BooksView(QTableView): # {{{
                ac.setCheckable(True)
                ac.setChecked(True)
            if col not in ('ondevice', 'inlibrary') and \
-                    (not self.model().is_custom_column(col) or \
+                    (not self.model().is_custom_column(col) or
                    self.model().custom_columns[col]['datatype'] not in ('bool',
                        )):
                m = self.column_header_context_menu.addMenu(
@ -240,7 +304,14 @@ class BooksView(QTableView): # {{{
                            a.setCheckable(True)
                            a.setChecked(True)
-
+            if self._model.db.field_metadata[col]['is_category']:
                act = self.column_header_context_menu.addAction(_('Quickview column %s') %
                        name,
                    partial(self.column_header_context_handler, action='quickview',
                        column=col))
                rows = self.selectionModel().selectedRows()
                if len(rows) > 1:
                    act.setEnabled(False)
            hidden_cols = [self.column_map[i] for i in
                    range(self.column_header.count()) if
@ -260,7 +331,6 @@ class BooksView(QTableView): # {{{
                        partial(self.column_header_context_handler,
                        action='show', column=col))
            self.column_header_context_menu.addSeparator()
            self.column_header_context_menu.addAction(
                    _('Shrink column if it is too wide to fit'),
@ -497,7 +567,6 @@ class BooksView(QTableView): # {{{
                        db.prefs[name] = ans
        return ans
    def restore_state(self):
        old_state = self.get_old_state()
        if old_state is None:
@ -820,7 +889,8 @@ class BooksView(QTableView): # {{{
        ids = frozenset(ids)
        m = self.model()
        for row in xrange(m.rowCount(QModelIndex())):
-            if len(row_map) >= len(ids): break
+            if len(row_map) >= len(ids):
                break
            c = m.id(row)
            if c in ids:
                row_map[c] = row
@ -880,7 +950,8 @@ class BooksView(QTableView): # {{{
                pass
            return None
        def fset(self, val):
-            if val is None: return
+            if val is None:
                return
            m = self.model()
            for row in xrange(m.rowCount(QModelIndex())):
                if m.id(row) == val:
@ -902,7 +973,8 @@ class BooksView(QTableView): # {{{
        column = ci.column()
        for i in xrange(ci.row()+1, self.row_count()):
-            if i in selected_rows: continue
+            if i in selected_rows:
                continue
            try:
                return self.model().id(self.model().index(i, column))
            except:
@ -910,7 +982,8 @@ class BooksView(QTableView): # {{{
        # No unselected rows after the current row, look before
        for i in xrange(ci.row()-1, -1, -1):
-            if i in selected_rows: continue
+            if i in selected_rows:
                continue
            try:
                return self.model().id(self.model().index(i, column))
            except:
--- a/src/calibre/gui2/preferences/coloring.py
+++ b/src/calibre/gui2/preferences/coloring.py
@ -636,10 +636,20 @@ class RulesModel(QAbstractListModel): # {{{
    def rule_to_html(self, kind, col, rule):
        if not isinstance(rule, Rule):
            if kind == 'color':
                return _('''
                <p>Advanced Rule for column <b>%(col)s</b>:
                <pre>%(rule)s</pre>
                ''')%dict(col=col, rule=prepare_string_for_xml(rule))
            else:
                return _('''
                <p>Advanced Rule: set <b>%(typ)s</b> for column <b>%(col)s</b>:
                <pre>%(rule)s</pre>
                ''')%dict(col=col,
                          typ=icon_rule_kinds[0][0]
                            if kind == icon_rule_kinds[0][1] else icon_rule_kinds[1][0],
                          rule=prepare_string_for_xml(rule))
        conditions = [self.condition_to_html(c) for c in rule.conditions]
        trans_kind = 'not found'
@ -761,7 +771,7 @@ class EditRules(QWidget): # {{{
                ' what icon to use. Click the Add Rule button below'
                ' to get started.<p>You can <b>change an existing rule</b> by'
                ' double clicking it.'))
-            self.add_advanced_button.setVisible(False)
+#             self.add_advanced_button.setVisible(False)
    def add_rule(self):
        d = RuleEditor(self.model.fm, self.pref_name)
@ -774,6 +784,7 @@ class EditRules(QWidget): # {{{
                self.changed.emit()
    def add_advanced(self):
        if self.pref_name == 'column_color_rules':
            td = TemplateDialog(self, '', mi=self.mi, fm=self.fm, color_field='')
            if td.exec_() == td.Accepted:
                col, r = td.rule
@ -781,6 +792,15 @@ class EditRules(QWidget): # {{{
                    idx = self.model.add_rule('color', col, r)
                    self.rules_view.scrollTo(idx)
                    self.changed.emit()
        else:
            td = TemplateDialog(self, '', mi=self.mi, fm=self.fm, icon_field_key='')
            if td.exec_() == td.Accepted:
                print(td.rule)
                typ, col, r = td.rule
                if typ and r and col:
                    idx = self.model.add_rule(typ, col, r)
                    self.rules_view.scrollTo(idx)
                    self.changed.emit()
    def edit_rule(self, index):
        try:
@ -790,8 +810,12 @@ class EditRules(QWidget): # {{{
        if isinstance(rule, Rule):
            d = RuleEditor(self.model.fm, self.pref_name)
            d.apply_rule(kind, col, rule)
-        else:
+        elif self.pref_name == 'column_color_rules':
            d = TemplateDialog(self, rule, mi=self.mi, fm=self.fm, color_field=col)
        else:
            d = TemplateDialog(self, rule, mi=self.mi, fm=self.fm, icon_field_key=col,
                               icon_rule_kind=kind)
        if d.exec_() == d.Accepted:
            if len(d.rule) == 2: # Convert template dialog rules to a triple
                d.rule = ('color', d.rule[0], d.rule[1])
--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@ -172,7 +172,10 @@ class Tweaks(QAbstractListModel, SearchQueryParser): # {{{
            doc.append(line[1:].strip())
        doc = '\n'.join(doc)
        while True:
            try:
                line = lines[pos]
            except IndexError:
                break
            if not line.strip():
                break
            spidx1 = line.find(' ')
--- a/src/calibre/gui2/store/stores/koobe_plugin.py
+++ b/src/calibre/gui2/store/stores/koobe_plugin.py
@ -8,7 +8,6 @@ __copyright__ = '2013, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import urllib
 from base64 import b64encode
 from contextlib import closing
 from lxml import html
--- a/src/calibre/gui2/store/stores/woblink_plugin.py
+++ b/src/calibre/gui2/store/stores/woblink_plugin.py
@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'
 import re
 import urllib
 from base64 import b64encode
 from contextlib import closing
 from lxml import html
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -113,7 +113,7 @@ class KindleDX(Kindle):
    id = 'kindledx'
 class KindleFire(KindleDX):
-    name = 'Kindle Fire'
+    name = 'Kindle Fire and Fire HD'
    id = 'kindle_fire'
    output_profile = 'kindle_fire'
    supports_color = True
@ -431,7 +431,8 @@ class KindlePage(QWizardPage, KindleUI):
            default = ac[2]
            if x.strip().endswith('@kindle.com'):
                accs.append((x, default))
-                if default: has_default = True
+                if default:
                    has_default = True
        if has_default:
            accs = [x for x in accs if x[1]]
        if accs:
@ -450,7 +451,8 @@ class KindlePage(QWizardPage, KindleUI):
        if (len(parts) >= 2 and parts[0] and self.send_email_widget.set_email_settings(True)):
            conf = smtp_prefs()
            accounts = conf.parse().accounts
-            if not accounts: accounts = {}
+            if not accounts:
                accounts = {}
            for y in accounts.values():
                y[2] = False
            accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1', True, True]
@ -484,9 +486,9 @@ class StanzaPage(QWizardPage, StanzaUI):
            c = server_config()
            c.set('port', p)
    def set_port(self, *args):
-        if not self.content_server.isChecked(): return
+        if not self.content_server.isChecked():
            return
        import socket
        s = socket.socket()
        with closing(s):
@ -518,8 +520,7 @@ class DevicePage(QWizardPage, DeviceUI):
        self.manufacturer_view.setModel(self.man_model)
        previous = dynamic.get('welcome_wizard_device', False)
        if previous:
-            previous = [x for x in get_devices() if \
+            previous = [x for x in get_devices() if x.id == previous]
                    x.id == previous]
            if not previous:
                previous = [Device]
            previous = previous[0]
@ -841,7 +842,6 @@ class FinishPage(QWizardPage, FinishUI):
        pass
 class Wizard(QWizard):
    BUTTON_TEXTS = {
@ -859,7 +859,6 @@ class Wizard(QWizard):
            _('&Finish')
            _('Commit')
    def __init__(self, parent):
        QWizard.__init__(self, parent)
        self.setWindowTitle(__appname__+' '+_('welcome wizard'))
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@ -47,6 +47,7 @@ FORMAT_ARG_DESCS = dict(
        pubdate=_('The published date'),
        last_modified=_('The date when the metadata for this book record'
            ' was last modified'),
        languages=_('The language(s) of this book'),
        id=_('The calibre internal id')
        )
@ -283,7 +284,6 @@ def save_book_to_disk(id_, db, root, opts, length):
                pass
 def do_save_book_to_disk(id_, mi, cover, plugboards,
        format_map, root, opts, length):
    from calibre.ebooks.metadata.meta import set_metadata
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -61,7 +61,6 @@ class ContentServer(object):
                 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
        return lm.replace('month', month[updated.month])
    def sort(self, items, field, order):
        field = self.db.data.sanitize_sort_field_name(field)
        if field not in self.db.field_metadata.sortable_field_keys():
@ -77,7 +76,7 @@ class ContentServer(object):
        try:
            id = int(id)
        except ValueError:
-            id = id.rpartition('_')[-1].partition('.')[0]
+            id = id.rpartition('.')[0].rpartition('_')[-1]
            match = re.search(r'\d+', id)
            if not match:
                raise cherrypy.HTTPError(404, 'id:%s not an integer'%id)
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/ber.po
+++ b/src/calibre/translations/ber.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/Show More
+++ b/Show More
		`@ -0,0 +1,2 @@`
							`__license__ = 'GPL v3'`
							`__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'`