sync with Kovid's branch

2025-08-30 23:00:21 -04:00 · 2013-05-16 19:32:12 +02:00 · 2013-05-16 19:32:12 +02:00 · 7b96fc5530
commit 7b96fc5530
parent ce03634581 6ad8c353ee
186 changed files with 65974 additions and 52383 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,60 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.30
+  date: 2013-05-10
+
+  new features:
+    - title: "Kobo driver: Add support for showing 'Archived' books on the device. Also up the supported firmware version to 2.5.3."
+      tickets: [1177677]
+
+    - title: "Driver for Blackberry 9790"
+      tickets: [1176607]
+
+    - title: "Add a tweak to turn off the highlighting of the book count when using a virtual library (Preferences->Tweaks)"
+
+    - title: "Add a button to clear the viewer search history in the viewer Preferences, under Miscellaneous"
+
+    - title: "Add keyboard shortcuts to clear the virtual Library and the additional restriction (Ctrl+Esc and Alt+Esc). Also use Shift+Esc to bring keyboard focus back tot he book list. Can be changed under Preferences->Keyboard"
+
+    - title: "Docx metadata: Read the language of the file, if present"
+ 
+  bug fixes:
+    - title: "Kobo driver: Fix unable to read SD card on OS X/Linux"
+      tickets: [1174815]
+
+    - title: "Content server: Fix unable to download ORIGINAL_* formats"
+      tickets: [1177158]
+
+    - title: "Fix regression that broke searching for terms containing a quote mark"
+      tickets: [1177114]
+
+    - title: "Fix regression that broke conversion of txt files when no input encoding is specified"
+      tickets: [1176622]
+
+    - title: "When changing to a virtual library, refresh the Book Details panel."
+      tickets: [1176296]
+
+    - title: "Fix regression that caused searching for user categories to break."
+      tickets: [1176187]
+
+    - title: "Fix error when downloading only covers and reviewing downloaded metadata."
+      tickets: [1176253]
+
+    - title: "MOBI metadata: Strip XML unsafe unicode codepoints when reading metadata from MOBI files."
+      tickets: [1175965]
+
+    - title: "Txt Input: Use the gbk encoding for txt files with detected encoding of gb2312."
+      tickets: [1175974]
+
+    - title: "When pressing Ctrl+Home/End preserve the horizontal scroll position in the book list"
+
+  improved recipes:
+    - NSFW
+    - Go Comics
+    - Various Polish news sources
+    - The Sun
+
 - version: 0.9.29
  date: 2013-05-03

--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -12,12 +12,15 @@ class BenchmarkPl(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
+    extra_css = 'ul {list-style-type: none;}'
    no_stylesheets = True
-    remove_attributes = ['style']
+    #remove_attributes = ['style']
    preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
-    keep_only_tags = [dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
+
+    keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
    remove_tags_after = dict(id='article')
    remove_tags = [dict(name='div', attrs={'class':['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs = {'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
+
    INDEX = 'http://www.benchmark.pl'
    feeds          = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
                          (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
@ -42,46 +45,16 @@ class BenchmarkPl(BasicNewsRecipe):
        for r in appendtag.findAll(attrs={'class':'changePage'}):
            r.extract()

-
-    def image_article(self, soup, appendtag):
-        nexturl = soup.find('div', attrs={'class':'preview'})
-        if nexturl:
-            nexturl = nexturl.find('a', attrs={'class':'move_next'})
-            image = appendtag.find('div', attrs={'class':'preview'}).div['style'][16:]
-            image = self.INDEX + image[:image.find("')")]
-            appendtag.find(attrs={'class':'preview'}).name='img'
-            appendtag.find(attrs={'class':'preview'})['src']=image
-            appendtag.find('a', attrs={'class':'move_next'}).extract()
-        while nexturl:
-            nexturl = self.INDEX + nexturl['href']
-            soup2 = self.index_to_soup(nexturl)
-            nexturl = soup2.find('a', attrs={'class':'move_next'})
-            image = soup2.find('div', attrs={'class':'preview'}).div['style'][16:]
-            image = self.INDEX + image[:image.find("')")]
-            soup2.find(attrs={'class':'preview'}).name='img'
-            soup2.find(attrs={'class':'preview'})['src']=image
-            pagetext = soup2.find('div', attrs={'class':'gallery'})
-            pagetext.find('div', attrs={'class':'title'}).extract()
-            pagetext.find('div', attrs={'class':'thumb'}).extract()
-            pagetext.find('div', attrs={'class':'panelOcenaObserwowane'}).extract()       
-            if nexturl:
-                pagetext.find('a', attrs={'class':'move_next'}).extract()
-            pagetext.find('a', attrs={'class':'move_back'}).extract()
-            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
-            for comment in comments:
-                comment.extract()
-            pos = len(appendtag.contents)
-            appendtag.insert(pos, pagetext)
-            
-
    def preprocess_html(self, soup):
-        if soup.find('div', attrs={'class':'preview'}):
-            self.image_article(soup, soup.body)
-        else:
        self.append_page(soup, soup.body)
        for a in soup('a'):
            if a.has_key('href') and not a['href'].startswith('http'):
                a['href'] = self.INDEX + a['href']
        for r in soup.findAll(attrs={'class':['comments', 'body']}):
            r.extract()
+        tag1 = soup.find(attrs={'class':'inlineGallery'})
+        if tag1:
+            for tag in tag1.findAll('li'):
+                tag['style'] = 'float: left; margin-right: 10px;'
+            tag1.findNext('p')['style'] = 'clear: both;'
        return soup
--- a/recipes/comics_com.recipe
+++ b/recipes/comics_com.recipe
@ -1,224 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class Comics(BasicNewsRecipe):
-    title               = 'Comics.com'
-    __author__          = 'Starson17'
-    description         = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
-    language            = 'en'
-    use_embedded_content= False
-    no_stylesheets      = True
-    oldest_article      = 24
-    remove_javascript   = True
-    cover_url           = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
-    recursions          = 0
-    max_articles_per_feed = 10
-    num_comics_to_get = 7
-    simultaneous_downloads = 1
-    # delay = 3
-
-    keep_only_tags     = [dict(name='a', attrs={'class':'STR_StripImage'}),
-                          dict(name='div', attrs={'class':'STR_Date'})
-                          ]
-
-    def parse_index(self):
-        feeds = []
-        for title, url in [
-                            ("9 Chickweed Lane", "http://comics.com/9_chickweed_lane"),
-                            ("Agnes", "http://comics.com/agnes"),
-                            ("Alley Oop", "http://comics.com/alley_oop"),
-                            ("Andy Capp", "http://comics.com/andy_capp"),
-                            ("Arlo & Janis", "http://comics.com/arlo&janis"),
-                            ("B.C.", "http://comics.com/bc"),
-                            ("Ballard Street", "http://comics.com/ballard_street"),
-                            # ("Ben", "http://comics.com/ben"),
-                            # ("Betty", "http://comics.com/betty"),
-                            # ("Big Nate", "http://comics.com/big_nate"),
-                            # ("Brevity", "http://comics.com/brevity"),
-                            # ("Candorville", "http://comics.com/candorville"),
-                            # ("Cheap Thrills", "http://comics.com/cheap_thrills"),
-                            # ("Committed", "http://comics.com/committed"),
-                            # ("Cow & Boy", "http://comics.com/cow&boy"),
-                            # ("Daddy's Home", "http://comics.com/daddys_home"),
-                            # ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
-                            # ("Drabble", "http://comics.com/drabble"),
-                            # ("F Minus", "http://comics.com/f_minus"),
-                            # ("Family Tree", "http://comics.com/family_tree"),
-                            # ("Farcus", "http://comics.com/farcus"),
-                            # ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
-                            # ("Ferd'nand", "http://comics.com/ferdnand"),
-                            # ("Flight Deck", "http://comics.com/flight_deck"),
-                            # ("Flo & Friends", "http://comics.com/flo&friends"),
-                            # ("Fort Knox", "http://comics.com/fort_knox"),
-                            # ("Frank & Ernest", "http://comics.com/frank&ernest"),
-                            # ("Frazz", "http://comics.com/frazz"),
-                            # ("Free Range", "http://comics.com/free_range"),
-                            # ("Geech Classics", "http://comics.com/geech_classics"),
-                            # ("Get Fuzzy", "http://comics.com/get_fuzzy"),
-                            # ("Girls & Sports", "http://comics.com/girls&sports"),
-                            # ("Graffiti", "http://comics.com/graffiti"),
-                            # ("Grand Avenue", "http://comics.com/grand_avenue"),
-                            # ("Heathcliff", "http://comics.com/heathcliff"),
-                            # "Heathcliff, a street-smart and mischievous cat with many adventures."
-                            # ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
-                            # ("Herman", "http://comics.com/herman"),
-                            # ("Home and Away", "http://comics.com/home_and_away"),
-                            # ("It's All About You", "http://comics.com/its_all_about_you"),
-                            # ("Jane's World", "http://comics.com/janes_world"),
-                            # ("Jump Start", "http://comics.com/jump_start"),
-                            # ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
-                            # ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
-                            # ("Liberty Meadows", "http://comics.com/liberty_meadows"),
-                            # ("Little Dog Lost", "http://comics.com/little_dog_lost"),
-                            # ("Lola", "http://comics.com/lola"),
-                            # ("Luann", "http://comics.com/luann"),
-                            # ("Marmaduke", "http://comics.com/marmaduke"),
-                            # ("Meg! Classics", "http://comics.com/meg_classics"),
-                            # ("Minimum Security", "http://comics.com/minimum_security"),
-                            # ("Moderately Confused", "http://comics.com/moderately_confused"),
-                            # ("Momma", "http://comics.com/momma"),
-                            # ("Monty", "http://comics.com/monty"),
-                            # ("Motley Classics", "http://comics.com/motley_classics"),
-                            # ("Nancy", "http://comics.com/nancy"),
-                            # ("Natural Selection", "http://comics.com/natural_selection"),
-                            # ("Nest Heads", "http://comics.com/nest_heads"),
-                            # ("Off The Mark", "http://comics.com/off_the_mark"),
-                            # ("On a Claire Day", "http://comics.com/on_a_claire_day"),
-                            # ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
-                            # ("Over the Hedge", "http://comics.com/over_the_hedge"),
-                            # ("PC and Pixel", "http://comics.com/pc_and_pixel"),
-                            # ("Peanuts", "http://comics.com/peanuts"),
-                            # ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
-                            # ("Pickles", "http://comics.com/pickles"),
-                            # ("Prickly City", "http://comics.com/prickly_city"),
-                            # ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
-                            # ("Reality Check", "http://comics.com/reality_check"),
-                            # ("Red & Rover", "http://comics.com/red&rover"),
-                            # ("Rip Haywire", "http://comics.com/rip_haywire"),
-                            # ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
-                            # ("Rose Is Rose", "http://comics.com/rose_is_rose"),
-                            # ("Rubes", "http://comics.com/rubes"),
-                            # ("Rudy Park", "http://comics.com/rudy_park"),
-                            # ("Scary Gary", "http://comics.com/scary_gary"),
-                            # ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
-                            # ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
-                            # ("Speed Bump", "http://comics.com/speed_bump"),
-                            # ("Spot The Frog", "http://comics.com/spot_the_frog"),
-                            # ("State of the Union", "http://comics.com/state_of_the_union"),
-                            # ("Strange Brew", "http://comics.com/strange_brew"),
-                            # ("Tarzan Classics", "http://comics.com/tarzan_classics"),
-                            # ("That's Life", "http://comics.com/thats_life"),
-                            # ("The Barn", "http://comics.com/the_barn"),
-                            # ("The Born Loser", "http://comics.com/the_born_loser"),
-                            # ("The Buckets", "http://comics.com/the_buckets"),
-                            # ("The Dinette Set", "http://comics.com/the_dinette_set"),
-                            # ("The Grizzwells", "http://comics.com/the_grizzwells"),
-                            # ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
-                            # ("The Knight Life", "http://comics.com/the_knight_life"),
-                            # ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
-                            # ("The Other Coast", "http://comics.com/the_other_coast"),
-                            # ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
-                            # ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
-                            # ("Watch Your Head", "http://comics.com/watch_your_head"),
-                            # ("Wizard of Id", "http://comics.com/wizard_of_id"),
-                            # ("Working Daze", "http://comics.com/working_daze"),
-                            # ("Working It Out", "http://comics.com/working_it_out"),
-                            # ("Zack Hill", "http://comics.com/zack_hill"),
-                            # ("(Th)ink", "http://comics.com/think"),
-                            # "Tackling the political and social issues impacting communities of color."
-                            # ("Adam Zyglis", "http://comics.com/adam_zyglis"),
-                            # "Known for his excellent caricatures, as well as independent and incisive imagery. "
-                            # ("Andy Singer", "http://comics.com/andy_singer"),
-                            # ("Bill Day", "http://comics.com/bill_day"),
-                            # "Powerful images on sensitive issues."
-                            # ("Bill Schorr", "http://comics.com/bill_schorr"),
-                            # ("Bob Englehart", "http://comics.com/bob_englehart"),
-                            # ("Brian Fairrington", "http://comics.com/brian_fairrington"),
-                            # ("Bruce Beattie", "http://comics.com/bruce_beattie"),
-                            # ("Cam Cardow", "http://comics.com/cam_cardow"),
-                            # ("Chip Bok", "http://comics.com/chip_bok"),
-                            # ("Chris Britt", "http://comics.com/chris_britt"),
-                            # ("Chuck Asay", "http://comics.com/chuck_asay"),
-                            # ("Clay Bennett", "http://comics.com/clay_bennett"),
-                            # ("Daryl Cagle", "http://comics.com/daryl_cagle"),
-                            # ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
-                            # "David Fitzsimmons is a new editorial cartoons on comics.com.  He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
-                            # ("Drew Litton", "http://comics.com/drew_litton"),
-                            # "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
-                            # ("Ed Stein", "http://comics.com/ed_stein"),
-                            # "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
-                            # ("Eric Allie", "http://comics.com/eric_allie"),
-                            # "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
-                            # ("Gary Markstein", "http://comics.com/gary_markstein"),
-                            # ("Gary McCoy", "http://comics.com/gary_mccoy"),
-                            # "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for  Best  Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
-                            # ("Gary Varvel", "http://comics.com/gary_varvel"),
-                            # ("Henry Payne", "http://comics.com/henry_payne"),
-                            # ("JD Crowe", "http://comics.com/jd_crowe"),
-                            # ("Jeff Parker", "http://comics.com/jeff_parker"),
-                            # ("Jeff Stahler", "http://comics.com/jeff_stahler"),
-                            # ("Jerry Holbert", "http://comics.com/jerry_holbert"),
-                            # ("John Cole", "http://comics.com/john_cole"),
-                            # ("John Darkow", "http://comics.com/john_darkow"),
-                            # "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for  the Columbia Daily Tribune, Missouri"
-                            # ("John Sherffius", "http://comics.com/john_sherffius"),
-                            # ("Larry Wright", "http://comics.com/larry_wright"),
-                            # ("Lisa Benson", "http://comics.com/lisa_benson"),
-                            # ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
-                            # ("Matt Bors", "http://comics.com/matt_bors"),
-                            # ("Michael Ramirez", "http://comics.com/michael_ramirez"),
-                            # ("Mike Keefe", "http://comics.com/mike_keefe"),
-                            # ("Mike Luckovich", "http://comics.com/mike_luckovich"),
-                            # ("MIke Thompson", "http://comics.com/mike_thompson"),
-                            # ("Monte Wolverton", "http://comics.com/monte_wolverton"),
-                            # "Unique mix of perspectives"
-                            # ("Mr. Fish", "http://comics.com/mr_fish"),
-                            # "Side effects may include swelling"
-                            # ("Nate Beeler", "http://comics.com/nate_beeler"),
-                            # "Middle America meets the Beltway."
-                            # ("Nick Anderson", "http://comics.com/nick_anderson"),
-                            # ("Pat Bagley", "http://comics.com/pat_bagley"),
-                            # "Unfair and Totally Unbalanced."
-                            # ("Paul Szep", "http://comics.com/paul_szep"),
-                            # ("RJ Matson", "http://comics.com/rj_matson"),
-                            # "Power cartoons from NYC and Capitol Hill"
-                            # ("Rob Rogers", "http://comics.com/rob_rogers"),
-                            # "Humorous slant on current events"
-                            # ("Robert Ariail", "http://comics.com/robert_ariail"),
-                            # "Clever and unpredictable"
-                            # ("Scott Stantis", "http://comics.com/scott_stantis"),
-                            # ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
-                            # ("Steve Benson", "http://comics.com/steve_benson"),
-                            # ("Steve Breen", "http://comics.com/steve_breen"),
-                            # ("Steve Kelley", "http://comics.com/steve_kelley"),
-                            # ("Steve Sack", "http://comics.com/steve_sack"),
-                            ]:
-            articles = self.make_links(url)
-            if articles:
-                feeds.append((title, articles))
-        return feeds
-
-    def make_links(self, url):
-        soup = self.index_to_soup(url)
-        # print 'soup: ', soup
-        title = ''
-        current_articles = []
-        pages = range(1, self.num_comics_to_get+1)
-        for page in pages:
-            page_url = url + '/?Page=' + str(page)
-            soup = self.index_to_soup(page_url)
-            if soup:
-                strip_tag = soup.find('a', attrs={'class': 'STR_StripImage'})
-                if strip_tag:
-                  print 'strip_tag: ', strip_tag
-                  title = strip_tag['title']
-                  print 'title: ', title
-            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
-        current_articles.reverse()
-        return current_articles
-
-    extra_css = '''
-                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
-		'''
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -16,7 +16,7 @@ class Computerworld_pl(BasicNewsRecipe):
    preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),]
    keep_only_tags = [dict(id=['szpaltaL', 's2011'])]
    remove_tags_after = dict(name='div', attrs={'class':'tresc'})
-    remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}),]
+    remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}), dict(name='a', attrs={'target':'_blank'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]

    def skip_ad_pages(self, soup):
--- a/recipes/conowego_pl.recipe
+++ b/recipes/conowego_pl.recipe
@ -15,6 +15,7 @@ class CoNowegoPl(BasicNewsRecipe):
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = False
+    ignore_duplicate_articles = {'title', 'url'}
    keep_only_tags = [dict(name='div', attrs={'class':'news_list single_view'})]
    remove_tags = [dict(name='div', attrs={'class':['ni_bottom', 'ni_rank', 'ni_date']})]
    feeds          = [(u'Aktualno\u015bci', u'http://www.conowego.pl/rss/aktualnosci-5/?type=100'), (u'Gaming', u'http://www.conowego.pl/rss/gaming-6/?type=100'), (u'Porady', u'http://www.conowego.pl/rss/porady-3/?type=100'), (u'Testy', u'http://www.conowego.pl/rss/testy-2/?type=100')]
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
+# vim:fileencoding=UTF-8

 __license__     = 'GPL v3'
 __author__ = 'Mori'
@ -14,7 +15,7 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
    __author__ = 'Mori'
    language = 'pl'

-    title = u'Dziennik Internautow'
+    title = u'Dziennik Internautów'
    publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
    description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'

--- a/recipes/dot_net.recipe
+++ b/recipes/dot_net.recipe
@ -1,32 +1,37 @@
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 import re

-class NetMagazineRecipe (BasicNewsRecipe):
-   __author__ = u'Marc Busqué <marc@lamarciana.com>'
-   __url__ = 'http://www.lamarciana.com'
+class dotnetMagazine (BasicNewsRecipe):
+    __author__ = u'Bonni Salles'
    __version__ = '1.0'
    __license__   = 'GPL v3'
-   __copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
-   title = u'.net magazine'
-   description = u'net is the world’s best-selling magazine for web designers and developers, featuring tutorials from leading agencies, interviews with the web’s biggest names, and agenda-setting features on the hottest issues affecting the internet today.'
-   language = 'en'
-   tags = 'web development, software'
+    __copyright__ = u'2013, Bonni Salles'
+    title                 = '.net magazine'
    oldest_article        = 7
-   remove_empty_feeds = True
    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
    cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'
-   keep_only_tags = [
-         dict(name='article', attrs={'class': re.compile('^node.*$', re.IGNORECASE)})
-         ]
+
+    remove_tags_after = dict(name='footer', id=lambda x:not x)
+    remove_tags_before = dict(name='header', id=lambda x:not x)
+
    remove_tags = [
-         dict(name='span', attrs={'class': 'comment-count'}),
-         dict(name='div', attrs={'class': 'item-list share-links'}),
-         dict(name='footer'),
+         dict(name='div', attrs={'class': 'item-list'}),
+         dict(name='h4', attrs={'class': 'std-hdr'}),
+         dict(name='div', attrs={'class': 'item-list share-links'}), #removes share links
+         dict(name=['script', 'noscript']),
+         dict(name='div', attrs={'id': 'comments-form'}), #comment these out if you want the comments to show
+         dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
+         dict(name='div', attrs={'id': 'right-col'}),
+         dict(name='div', attrs={'id': 'comments'}), #comment these out if you want the comments to show
+         dict(name='div', attrs={'class': 'item-list related-content'}),
+
         ]
-   remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height', 'style']
-   extra_css = 'img {max-width: 100%; display: block; margin: auto;} .captioned-image div {text-align: center; font-style: italic;}'

    feeds = [
-         (u'.net', u'http://feeds.feedburner.com/net/topstories'),
+               (u'net', u'http://feeds.feedburner.com/net/topstories')
            ]
--- a/recipes/dziennik_lodzki.recipe
+++ b/recipes/dziennik_lodzki.recipe
@ -16,7 +16,7 @@ class DziennikLodzki(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/piano'})]

    feeds          = [(u'Na sygnale', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_nasygnale.xml?201302'), (u'\u0141\xf3d\u017a', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_lodz.xml?201302'), (u'Opinie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_opinie.xml?201302'), (u'Pieni\u0105dze', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533763/index.rss?201302'), (u'Kultura', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533762/index.rss?201302'), (u'Sport', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533761/index.rss?201302'), (u'Akcje', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_akcje.xml?201302'), (u'M\xf3j Reporter', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_mojreporter.xml?201302'), (u'Studni\xf3wki', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_studniowki.xml?201302'), (u'Kraj', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_kraj.xml?201302'), (u'Zdrowie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_zdrowie.xml?201302')]

--- a/recipes/dziennik_zachodni.recipe
+++ b/recipes/dziennik_zachodni.recipe
@ -16,7 +16,7 @@ class DziennikZachodni(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'}), dict(name='aside')]

    feeds          = [(u'Wszystkie', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533764/index.rss?201302'), (u'Wiadomo\u015bci', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533765/index.rss?201302'), (u'Regiony', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Opinie', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Blogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_blogi.xml?201302'), (u'Serwisy', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_serwisy.xml?201302'), (u'Sport', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533766/index.rss?201302'), (u'M\xf3j Reporter', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_mojreporter.xml?201302'), (u'Na narty', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_nanarty.xml?201302'), (u'Drogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_drogi.xml?201302'), (u'Pieni\u0105dze', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533768/index.rss?201302')]

--- a/recipes/echo_dnia.recipe
+++ b/recipes/echo_dnia.recipe
@ -16,6 +16,7 @@ class EchoDnia(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_empty_feeds = True
    no_stylesheets = True
+    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}

    preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), 
--- a/recipes/ekundelek_pl.recipe
+++ b/recipes/ekundelek_pl.recipe
@ -12,7 +12,7 @@ class swiatczytnikow(BasicNewsRecipe):
    __author__ = u'Artur Stachecki'
    oldest_article = 7
    max_articles_per_feed = 100
-
+    remove_empty_feeds = True
    remove_tags = [dict(name = 'div', attrs = {'class' : 'feedflare'})]

    feeds = [(u'Wpisy', u'http://feeds.feedburner.com/Ekundelekpl?format=xml')]
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -11,6 +11,7 @@ class eMuzyka(BasicNewsRecipe):
    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
+    remove_empty_feeds = True
    max_articles_per_feed = 100
    remove_attributes = ['style']
    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -9,6 +9,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
    category = 'newspaper'
    publication_type = 'newspaper'
+    #encoding = 'iso-8859-2'
    masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    INDEX = 'http://wyborcza.pl'
    remove_empty_feeds = True
@ -16,6 +17,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets = True
+    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
    remove_tags_before = dict(id='k0')
    remove_tags_after = dict(id='banP4')
@ -24,7 +26,19 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
             (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
             (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
             (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
-             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
+             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
+             (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
+             (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
+             (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
+             (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
+             (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
+             (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
+             (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
+             (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
+             (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
+             (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
+             (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'),
+             (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
             ]

    def skip_ad_pages(self, soup):
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@ -31,6 +31,14 @@ class Gildia(BasicNewsRecipe):
            for link in content.findAll(name='a'):
                if 'fragment' in link['href']:
                    return self.index_to_soup(link['href'], raw=True)
+        if 'relacj' in soup.title.string.lower():
+            for link in content.findAll(name='a'):
+                if 'relacj' in link['href']:
+                    return self.index_to_soup(link['href'], raw=True)
+        if 'wywiad' in soup.title.string.lower():
+            for link in content.findAll(name='a'):
+                if 'wywiad' in link['href']:
+                    return self.index_to_soup(link['href'], raw=True)


    def preprocess_html(self, soup):
--- a/recipes/glos_wielkopolski.recipe
+++ b/recipes/glos_wielkopolski.recipe
@ -16,7 +16,7 @@ class GlosWielkopolski(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href', 'http://www.gloswielkopolski.pl/newsletter/'})]

    feeds          = [(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')]

--- a/recipes/go_comics.recipe
+++ b/recipes/go_comics.recipe
@ -1,229 +1,443 @@
+__license__   = 'GPL v3'
+__copyright__ = 'Copyright 2010 Starson17'
+'''
+www.gocomics.com
+'''
 from calibre.web.feeds.news import BasicNewsRecipe
+import re

-
-class Comics(BasicNewsRecipe):
-    title               = 'Comics.com'
+class GoComics(BasicNewsRecipe):
+    title               = 'Go Comics'
    __author__          = 'Starson17'
-    description         = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
+    __version__         = '1.06'
+    __date__            = '07 June 2011'
+    description         = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
+    category            = 'news, comics'
    language            = 'en'
    use_embedded_content= False
    no_stylesheets      = True
-    oldest_article      = 24
    remove_javascript   = True
-    cover_url           = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
-    recursions          = 0
-    max_articles_per_feed = 10
-    num_comics_to_get = 7
-    simultaneous_downloads = 1
-    # delay = 3
+    remove_attributes = ['style']

-    keep_only_tags     = [dict(name='h1'),
-                          dict(name='p', attrs={'class':'feature_item'})
+    ####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
+    # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
+    num_comics_to_get = 1
+    # comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large
+    comic_size = 900
+    # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
+    # Please do not overload their servers by selecting all comics and 1000 strips from each!
+
+    conversion_options = {'linearize_tables'  : True
+                        , 'comment'           : description
+                        , 'tags'              : category
+                        , 'language'          : language
+                        }
+
+    keep_only_tags     = [dict(name='div', attrs={'class':['feature','banner']}),
                          ]

+    remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
+                   dict(name='div', attrs={'class':['tag-wrapper']}),
+                   dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
+                   dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
+                   dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
+                   ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.addheaders = [('Referer','http://www.gocomics.com/')]
+        return br
+
    def parse_index(self):
        feeds = []
        for title, url in [
-                            ("9 Chickweed Lane", "http://gocomics.com/9_chickweed_lane"),
-                            ("Agnes", "http://gocomics.com/agnes"),
-                            ("Alley Oop", "http://gocomics.com/alley_oop"),
-                            ("Andy Capp", "http://gocomics.com/andy_capp"),
-                            ("Arlo & Janis", "http://gocomics.com/arlo&janis"),
-                            ("B.C.", "http://gocomics.com/bc"),
-                            ("Ballard Street", "http://gocomics.com/ballard_street"),
-                            # ("Ben", "http://comics.com/ben"),
-                            # ("Betty", "http://comics.com/betty"),
-                            # ("Big Nate", "http://comics.com/big_nate"),
-                            # ("Brevity", "http://comics.com/brevity"),
-                            # ("Candorville", "http://comics.com/candorville"),
-                            # ("Cheap Thrills", "http://comics.com/cheap_thrills"),
-                            # ("Committed", "http://comics.com/committed"),
-                            # ("Cow & Boy", "http://comics.com/cow&boy"),
-                            # ("Daddy's Home", "http://comics.com/daddys_home"),
-                            # ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
-                            # ("Drabble", "http://comics.com/drabble"),
-                            # ("F Minus", "http://comics.com/f_minus"),
-                            # ("Family Tree", "http://comics.com/family_tree"),
-                            # ("Farcus", "http://comics.com/farcus"),
-                            # ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
-                            # ("Ferd'nand", "http://comics.com/ferdnand"),
-                            # ("Flight Deck", "http://comics.com/flight_deck"),
-                            # ("Flo & Friends", "http://comics.com/flo&friends"),
-                            # ("Fort Knox", "http://comics.com/fort_knox"),
-                            # ("Frank & Ernest", "http://comics.com/frank&ernest"),
-                            # ("Frazz", "http://comics.com/frazz"),
-                            # ("Free Range", "http://comics.com/free_range"),
-                            # ("Geech Classics", "http://comics.com/geech_classics"),
-                            # ("Get Fuzzy", "http://comics.com/get_fuzzy"),
-                            # ("Girls & Sports", "http://comics.com/girls&sports"),
-                            # ("Graffiti", "http://comics.com/graffiti"),
-                            # ("Grand Avenue", "http://comics.com/grand_avenue"),
-                            # ("Heathcliff", "http://comics.com/heathcliff"),
-                            # "Heathcliff, a street-smart and mischievous cat with many adventures."
-                            # ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
-                            # ("Herman", "http://comics.com/herman"),
-                            # ("Home and Away", "http://comics.com/home_and_away"),
-                            # ("It's All About You", "http://comics.com/its_all_about_you"),
-                            # ("Jane's World", "http://comics.com/janes_world"),
-                            # ("Jump Start", "http://comics.com/jump_start"),
-                            # ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
-                            # ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
-                            # ("Liberty Meadows", "http://comics.com/liberty_meadows"),
-                            # ("Little Dog Lost", "http://comics.com/little_dog_lost"),
-                            # ("Lola", "http://comics.com/lola"),
-                            # ("Luann", "http://comics.com/luann"),
-                            # ("Marmaduke", "http://comics.com/marmaduke"),
-                            # ("Meg! Classics", "http://comics.com/meg_classics"),
-                            # ("Minimum Security", "http://comics.com/minimum_security"),
-                            # ("Moderately Confused", "http://comics.com/moderately_confused"),
-                            # ("Momma", "http://comics.com/momma"),
-                            # ("Monty", "http://comics.com/monty"),
-                            # ("Motley Classics", "http://comics.com/motley_classics"),
-                            # ("Nancy", "http://comics.com/nancy"),
-                            # ("Natural Selection", "http://comics.com/natural_selection"),
-                            # ("Nest Heads", "http://comics.com/nest_heads"),
-                            # ("Off The Mark", "http://comics.com/off_the_mark"),
-                            # ("On a Claire Day", "http://comics.com/on_a_claire_day"),
-                            # ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
-                            # ("Over the Hedge", "http://comics.com/over_the_hedge"),
-                            # ("PC and Pixel", "http://comics.com/pc_and_pixel"),
-                            # ("Peanuts", "http://comics.com/peanuts"),
-                            # ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
-                            # ("Pickles", "http://comics.com/pickles"),
-                            # ("Prickly City", "http://comics.com/prickly_city"),
-                            # ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
-                            # ("Reality Check", "http://comics.com/reality_check"),
-                            # ("Red & Rover", "http://comics.com/red&rover"),
-                            # ("Rip Haywire", "http://comics.com/rip_haywire"),
-                            # ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
-                            # ("Rose Is Rose", "http://comics.com/rose_is_rose"),
-                            # ("Rubes", "http://comics.com/rubes"),
-                            # ("Rudy Park", "http://comics.com/rudy_park"),
-                            # ("Scary Gary", "http://comics.com/scary_gary"),
-                            # ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
-                            # ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
-                            # ("Speed Bump", "http://comics.com/speed_bump"),
-                            # ("Spot The Frog", "http://comics.com/spot_the_frog"),
-                            # ("State of the Union", "http://comics.com/state_of_the_union"),
-                            # ("Strange Brew", "http://comics.com/strange_brew"),
-                            # ("Tarzan Classics", "http://comics.com/tarzan_classics"),
-                            # ("That's Life", "http://comics.com/thats_life"),
-                            # ("The Barn", "http://comics.com/the_barn"),
-                            # ("The Born Loser", "http://comics.com/the_born_loser"),
-                            # ("The Buckets", "http://comics.com/the_buckets"),
-                            # ("The Dinette Set", "http://comics.com/the_dinette_set"),
-                            # ("The Grizzwells", "http://comics.com/the_grizzwells"),
-                            # ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
-                            # ("The Knight Life", "http://comics.com/the_knight_life"),
-                            # ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
-                            # ("The Other Coast", "http://comics.com/the_other_coast"),
-                            # ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
-                            # ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
-                            # ("Watch Your Head", "http://comics.com/watch_your_head"),
-                            # ("Wizard of Id", "http://comics.com/wizard_of_id"),
-                            # ("Working Daze", "http://comics.com/working_daze"),
-                            # ("Working It Out", "http://comics.com/working_it_out"),
-                            # ("Zack Hill", "http://comics.com/zack_hill"),
-                            # ("(Th)ink", "http://comics.com/think"),
-                            # "Tackling the political and social issues impacting communities of color."
-                            # ("Adam Zyglis", "http://comics.com/adam_zyglis"),
-                            # "Known for his excellent caricatures, as well as independent and incisive imagery. "
-                            # ("Andy Singer", "http://comics.com/andy_singer"),
-                            # ("Bill Day", "http://comics.com/bill_day"),
-                            # "Powerful images on sensitive issues."
-                            # ("Bill Schorr", "http://comics.com/bill_schorr"),
-                            # ("Bob Englehart", "http://comics.com/bob_englehart"),
-                            # ("Brian Fairrington", "http://comics.com/brian_fairrington"),
-                            # ("Bruce Beattie", "http://comics.com/bruce_beattie"),
-                            # ("Cam Cardow", "http://comics.com/cam_cardow"),
-                            # ("Chip Bok", "http://comics.com/chip_bok"),
-                            # ("Chris Britt", "http://comics.com/chris_britt"),
-                            # ("Chuck Asay", "http://comics.com/chuck_asay"),
-                            # ("Clay Bennett", "http://comics.com/clay_bennett"),
-                            # ("Daryl Cagle", "http://comics.com/daryl_cagle"),
-                            # ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
-                            # "David Fitzsimmons is a new editorial cartoons on comics.com.  He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
-                            # ("Drew Litton", "http://comics.com/drew_litton"),
-                            # "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
-                            # ("Ed Stein", "http://comics.com/ed_stein"),
-                            # "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
-                            # ("Eric Allie", "http://comics.com/eric_allie"),
-                            # "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
-                            # ("Gary Markstein", "http://comics.com/gary_markstein"),
-                            # ("Gary McCoy", "http://comics.com/gary_mccoy"),
-                            # "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for  Best  Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
-                            # ("Gary Varvel", "http://comics.com/gary_varvel"),
-                            # ("Henry Payne", "http://comics.com/henry_payne"),
-                            # ("JD Crowe", "http://comics.com/jd_crowe"),
-                            # ("Jeff Parker", "http://comics.com/jeff_parker"),
-                            # ("Jeff Stahler", "http://comics.com/jeff_stahler"),
-                            # ("Jerry Holbert", "http://comics.com/jerry_holbert"),
-                            # ("John Cole", "http://comics.com/john_cole"),
-                            # ("John Darkow", "http://comics.com/john_darkow"),
-                            # "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for  the Columbia Daily Tribune, Missouri"
-                            # ("John Sherffius", "http://comics.com/john_sherffius"),
-                            # ("Larry Wright", "http://comics.com/larry_wright"),
-                            # ("Lisa Benson", "http://comics.com/lisa_benson"),
-                            # ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
-                            # ("Matt Bors", "http://comics.com/matt_bors"),
-                            # ("Michael Ramirez", "http://comics.com/michael_ramirez"),
-                            # ("Mike Keefe", "http://comics.com/mike_keefe"),
-                            # ("Mike Luckovich", "http://comics.com/mike_luckovich"),
-                            # ("MIke Thompson", "http://comics.com/mike_thompson"),
-                            # ("Monte Wolverton", "http://comics.com/monte_wolverton"),
-                            # "Unique mix of perspectives"
-                            # ("Mr. Fish", "http://comics.com/mr_fish"),
-                            # "Side effects may include swelling"
-                            # ("Nate Beeler", "http://comics.com/nate_beeler"),
-                            # "Middle America meets the Beltway."
-                            # ("Nick Anderson", "http://comics.com/nick_anderson"),
-                            # ("Pat Bagley", "http://comics.com/pat_bagley"),
-                            # "Unfair and Totally Unbalanced."
-                            # ("Paul Szep", "http://comics.com/paul_szep"),
-                            # ("RJ Matson", "http://comics.com/rj_matson"),
-                            # "Power cartoons from NYC and Capitol Hill"
-                            # ("Rob Rogers", "http://comics.com/rob_rogers"),
-                            # "Humorous slant on current events"
-                            # ("Robert Ariail", "http://comics.com/robert_ariail"),
-                            # "Clever and unpredictable"
-                            # ("Scott Stantis", "http://comics.com/scott_stantis"),
-                            # ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
-                            # ("Steve Benson", "http://comics.com/steve_benson"),
-                            # ("Steve Breen", "http://comics.com/steve_breen"),
-                            # ("Steve Kelley", "http://comics.com/steve_kelley"),
-                            # ("Steve Sack", "http://comics.com/steve_sack"),
+                       #(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
+                       #(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
+                       #(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
+                       #(u"Agnes", u"http://www.gocomics.com/agnes"),
+                       #(u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
+                       #(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
+                       (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
+                       #(u"Annie", u"http://www.gocomics.com/annie"),
+                       #(u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
+                       #(u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
+                       (u"B.C.", u"http://www.gocomics.com/bc"),
+                       #(u"Back in the Day", u"http://www.gocomics.com/backintheday"),
+                       #(u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
+                       (u"Baldo", u"http://www.gocomics.com/baldo"),
+                       #(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
+                       #(u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
+                       #(u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
+                       #(u"Ben", u"http://www.gocomics.com/ben"),
+                       #(u"Betty", u"http://www.gocomics.com/betty"),
+                       #(u"Bewley", u"http://www.gocomics.com/bewley"),
+                       #(u"Big Nate", u"http://www.gocomics.com/bignate"),
+                       #(u"Big Top", u"http://www.gocomics.com/bigtop"),
+                       #(u"Biographic", u"http://www.gocomics.com/biographic"),
+                       #(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
+                       #(u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
+                       #(u"Bliss", u"http://www.gocomics.com/bliss"),
+                       #(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
+                       #(u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
+                       #(u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
+                       #(u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
+                       #(u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
+                       (u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
+                       #(u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
+                       #(u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
+                       #(u"Brevity", u"http://www.gocomics.com/brevity"),
+                       #(u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
+                       (u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
+                       (u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
+                       #(u"Candorville", u"http://www.gocomics.com/candorville"),
+                       #(u"Cathy", u"http://www.gocomics.com/cathy"),
+                       #(u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
+                       #(u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
+                       #(u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
+                       #(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
+                       #(u"Cleats", u"http://www.gocomics.com/cleats"),
+                       #(u"Close to Home", u"http://www.gocomics.com/closetohome"),
+                       #(u"Committed", u"http://www.gocomics.com/committed"),
+                       #(u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
+                       #(u"Cornered", u"http://www.gocomics.com/cornered"),
+                       #(u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
+                       #(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
+                       #(u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
+                       #(u"Deep Cover", u"http://www.gocomics.com/deepcover"),
+                       #(u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
+                       #(u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
+                       #(u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
+                       #(u"Doodles", u"http://www.gocomics.com/doodles"),
+                       #(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
+                       #(u"Drabble", u"http://www.gocomics.com/drabble"),
+                       #(u"Eek!", u"http://www.gocomics.com/eek"),
+                       #(u"F Minus", u"http://www.gocomics.com/fminus"),
+                       #(u"Family Tree", u"http://www.gocomics.com/familytree"),
+                       #(u"Farcus", u"http://www.gocomics.com/farcus"),
+                       #(u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
+                       #(u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
+                       #(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
+                       #(u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
+                       (u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
+                       #(u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
+                       #(u"Fort Knox", u"http://www.gocomics.com/fortknox"),
+                       #(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
+                       #(u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
+                       (u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
+                       #(u"Frazz", u"http://www.gocomics.com/frazz"),
+                       #(u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
+                       #(u"Free Range", u"http://www.gocomics.com/freerange"),
+                       #(u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
+                       #(u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
+                       (u"Garfield", u"http://www.gocomics.com/garfield"),
+                       #(u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
+                       #(u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
+                       (u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
+                       #(u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
+                       #(u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
+                       #(u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
+                       #(u"Graffiti", u"http://www.gocomics.com/graffiti"),
+                       #(u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
+                       #(u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
+                       #(u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
+                       #(u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
+                       #(u"Home and Away", u"http://www.gocomics.com/homeandaway"),
+                       #(u"Housebroken", u"http://www.gocomics.com/housebroken"),
+                       #(u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
+                       #(u"Imagine This", u"http://www.gocomics.com/imaginethis"),
+                       #(u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
+                       #(u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
+                       #(u"Ink Pen", u"http://www.gocomics.com/inkpen"),
+                       #(u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
+                       #(u"Jane's World", u"http://www.gocomics.com/janesworld"),
+                       #(u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
+                       #(u"Jump Start", u"http://www.gocomics.com/jumpstart"),
+                       #(u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
+                       #(u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
+                       #(u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
+                       #(u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
+                       #(u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
+                       #(u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
+                       #(u"Lio", u"http://www.gocomics.com/lio"),
+                       #(u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
+                       #(u"Little Otto", u"http://www.gocomics.com/littleotto"),
+                       #(u"Lola", u"http://www.gocomics.com/lola"),
+                       #(u"Love Is...", u"http://www.gocomics.com/loveis"),
+                       (u"Luann", u"http://www.gocomics.com/luann"),
+                       #(u"Maintaining", u"http://www.gocomics.com/maintaining"),
+                       #(u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
+                       #(u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
+                       #(u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
+                       #(u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
+                       (u"Momma", u"http://www.gocomics.com/momma"),
+                       #(u"Monty", u"http://www.gocomics.com/monty"),
+                       #(u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
+                       #(u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
+                       #(u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
+                       #(u"Nancy", u"http://www.gocomics.com/nancy"),
+                       #(u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
+                       #(u"Nest Heads", u"http://www.gocomics.com/nestheads"),
+                       #(u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
+                       #(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
+                       (u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
+                       #(u"Off The Mark", u"http://www.gocomics.com/offthemark"),
+                       #(u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
+                       #(u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
+                       #(u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
+                       #(u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
+                       #(u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
+                       #(u"Overboard", u"http://www.gocomics.com/overboard"),
+                       #(u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
+                       (u"Peanuts", u"http://www.gocomics.com/peanuts"),
+                       (u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"),
+                       #(u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
+                       #(u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
+                       #(u"Pickles", u"http://www.gocomics.com/pickles"),
+                       #(u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
+                       #(u"Pluggers", u"http://www.gocomics.com/pluggers"),
+                       (u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
+                       #(u"PreTeena", u"http://www.gocomics.com/preteena"),
+                       #(u"Prickly City", u"http://www.gocomics.com/pricklycity"),
+                       #(u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
+                       #(u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
+                       #(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
+                       #(u"Reality Check", u"http://www.gocomics.com/realitycheck"),
+                       #(u"Red and Rover", u"http://www.gocomics.com/redandrover"),
+                       #(u"Red Meat", u"http://www.gocomics.com/redmeat"),
+                       #(u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
+                       #(u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
+                       #(u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
+                       (u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
+                       #(u"Rudy Park", u"http://www.gocomics.com/rudypark"),
+                       #(u"Scary Gary", u"http://www.gocomics.com/scarygary"),
+                       #(u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
+                       (u"Shoe", u"http://www.gocomics.com/shoe"),
+                       #(u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
+                       #(u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
+                       #(u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
+                       #(u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
+                       #(u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
+                       #(u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
+                       #(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
+                       #(u"Sylvia", u"http://www.gocomics.com/sylvia"),
+                       #(u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
+                       #(u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
+                       #(u"That's Life", u"http://www.gocomics.com/thatslife"),
+                       #(u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
+                       #(u"The Barn", u"http://www.gocomics.com/thebarn"),
+                       #(u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
+                       #(u"The Boondocks", u"http://www.gocomics.com/boondocks"),
+                       (u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
+                       #(u"The Buckets", u"http://www.gocomics.com/thebuckets"),
+                       #(u"The City", u"http://www.gocomics.com/thecity"),
+                       #(u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
+                       #(u"The Doozies", u"http://www.gocomics.com/thedoozies"),
+                       #(u"The Duplex", u"http://www.gocomics.com/duplex"),
+                       #(u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
+                       #(u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
+                       #(u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
+                       #(u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
+                       #(u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
+                       #(u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
+                       #(u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
+                       (u"The Middletons", u"http://www.gocomics.com/themiddletons"),
+                       #(u"The Norm", u"http://www.gocomics.com/thenorm"),
+                       #(u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
+                       #(u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
+                       #(u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
+                       #(u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
+                       #(u"TOBY", u"http://www.gocomics.com/toby"),
+                       #(u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
+                       #(u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
+                       #(u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
+                       #(u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
+                       #(u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
+                       #(u"Wee Pals", u"http://www.gocomics.com/weepals"),
+                       #(u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
+                       (u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
+                       #(u"Working Daze", u"http://www.gocomics.com/workingdaze"),
+                       #(u"Working It Out", u"http://www.gocomics.com/workingitout"),
+                       #(u"Yenny", u"http://www.gocomics.com/yenny"),
+                       #(u"Zack Hill", u"http://www.gocomics.com/zackhill"),
+                       #(u"Ziggy", u"http://www.gocomics.com/ziggy"),
+                       (u"9 to 5", u"http://www.gocomics.com/9to5"),
+                       (u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
+                       (u"Herman", u"http://www.gocomics.com/herman"),
+                       (u"Loose Parts", u"http://www.gocomics.com/looseparts"),
+                       (u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
+                       (u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"),
+                       (u"Rubes", u"http://www.gocomics.com/rubes"),
+                       (u"Speed Bump", u"http://www.gocomics.com/speedbump"),
+                       (u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
+                       (u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
+                       #
+                       ######## EDITORIAL CARTOONS #####################
+                       #(u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
+                       #(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
+                       #(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
+                       #(u"Bill Day", u"http://www.gocomics.com/billday"),
+                       #(u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
+                       #(u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
+                       #(u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
+                       #(u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
+                       #(u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
+                       #(u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
+                       #(u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
+                       #(u"Chip Bok",u"http://www.gocomics.com/chipbok"),
+                       #(u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
+                       #(u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
+                       #(u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
+                       #(u"Clay Jones",u"http://www.gocomics.com/clayjones"),
+                       #(u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
+                       #(u"Dana Summers",u"http://www.gocomics.com/danasummers"),
+                       #(u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
+                       #(u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
+                       #(u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
+                       #(u"Don Wright",u"http://www.gocomics.com/donwright"),
+                       #(u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
+                       #(u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
+                       #(u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
+                       #(u"Ed Stein", u"http://www.gocomics.com/edstein"),
+                       #(u"Eric Allie", u"http://www.gocomics.com/ericallie"),
+                       #(u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
+                       #(u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
+                       #(u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
+                       #(u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
+                       #(u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
+                       #(u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
+                       #(u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
+                       #(u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
+                       #(u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
+                       #(u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
+                       #(u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
+                       #(u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
+                       #(u"Joel Pett",u"http://www.gocomics.com/joelpett"),
+                       #(u"John Cole", u"http://www.gocomics.com/johncole"),
+                       #(u"John Darkow", u"http://www.gocomics.com/johndarkow"),
+                       #(u"John Deering",u"http://www.gocomics.com/johndeering"),
+                       #(u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
+                       #(u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
+                       #(u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
+                       #(u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
+                       #(u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
+                       #(u"Larry Wright", u"http://www.gocomics.com/larrywright"),
+                       #(u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
+                       #(u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
+                       #(u"Matt Bors", u"http://www.gocomics.com/mattbors"),
+                       #(u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
+                       #(u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
+                       #(u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
+                       #(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
+                       #(u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
+                       #(u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
+                       #(u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
+                       #(u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
+                       #(u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
+                       #(u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
+                       #(u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
+                       #(u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
+                       #(u"Paul Szep", u"http://www.gocomics.com/paulszep"),
+                       #(u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
+                       #(u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
+                       #(u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
+                       #(u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
+                       #(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
+                       #(u"Small World",u"http://www.gocomics.com/smallworld"),
+                       #(u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
+                       #(u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
+                       #(u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
+                       #(u"Steve Sack", u"http://www.gocomics.com/stevesack"),
+                       #(u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
+                       #(u"Ted Rall",u"http://www.gocomics.com/tedrall"),
+                       #(u"(Th)ink", u"http://www.gocomics.com/think"),
+                       #(u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
+                       #(u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
+                       #(u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
+                       #(u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
+                       #(u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
+                       #(u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
+                       #(u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
+                       #(u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
+                       #(u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
+                       #(u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
+                       #(u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
+                       #(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
+                       #(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
                             ]:
+            print 'Working on: ', title
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds

    def make_links(self, url):
-        soup = self.index_to_soup(url)
-        # print 'soup: ', soup
-        title = ''
+        title = 'Temp'
        current_articles = []
-        from datetime import datetime, timedelta
-        now = datetime.now()
-        dates = [(now-timedelta(days=d)).strftime('%Y/%m/%d') for d in range(self.num_comics_to_get)]
-
-        for page in dates:
-            page_url = url + '/' + str(page)
-            print(page_url)
-            soup = self.index_to_soup(page_url)
-            if soup:
-                strip_tag = self.tag_to_string(soup.find('a'))
-                if strip_tag:
-                  print 'strip_tag: ', strip_tag
-                  title = strip_tag
-                  print 'title: ', title
+        pages = range(1, self.num_comics_to_get+1)
+        for page in pages:
+            page_soup = self.index_to_soup(url)
+            if page_soup:
+                try:
+                    strip_title = page_soup.find(name='div', attrs={'class':'top'}).h1.a.string
+                except:
+                    strip_title = 'Error - no Title found'
+                try:
+                    date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
+                    if not date_title:
+                        date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
+                except:
+                    date_title = 'Error - no Date found'
+                title = strip_title + ' - ' + date_title
+                for i in range(2):
+                    try:
+                        strip_url_date = page_soup.find(name='div', attrs={'class':'top'}).h1.a['href']
+                        break  # success - this is normal exit
+                    except:
+                        strip_url_date = None
+                        continue  # try to get strip_url_date again
+                for i in range(2):
+                    try:
+                        prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
+                        break  # success - this is normal exit
+                    except:
+                        prev_strip_url_date = None
+                        continue  # try to get prev_strip_url_date again
+                if strip_url_date:
+                    page_url = 'http://www.gocomics.com' + strip_url_date
+                else:
+                    continue
+                if prev_strip_url_date:
+                    prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date
+                else:
+                    continue
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
+            url = prev_page_url
        current_articles.reverse()
        return current_articles

+    def preprocess_html(self, soup):
+        if soup.title:
+            title_string = soup.title.string.strip()
+            _cd = title_string.split(',',1)[1]
+            comic_date = ' '.join(_cd.split(' ', 4)[0:-1])
+        if soup.h1.span:
+            artist = soup.h1.span.string
+            soup.h1.span.string.replaceWith(comic_date + artist)
+        feature_item = soup.find('p',attrs={'class':'feature_item'})
+        if feature_item.a:
+            a_tag = feature_item.a
+            a_href = a_tag["href"]
+            img_tag = a_tag.img
+            img_tag["src"] = a_href
+            img_tag["width"] = self.comic_size
+            img_tag["height"] = None
+        return self.adeify_images(soup)
+
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    img {max-width:100%; min-width:100%;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
    '''
+
--- a/recipes/kdefamily_pl.recipe
+++ b/recipes/kdefamily_pl.recipe
@ -12,5 +12,6 @@ class KDEFamilyPl(BasicNewsRecipe):
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
    no_stylesheets = True
+    remove_empty_feeds = True
    use_embedded_content = True
    feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
--- a/recipes/legeartis.recipe
+++ b/recipes/legeartis.recipe
@ -21,7 +21,7 @@ class LegeArtisRecipe(BasicNewsRecipe):

    no_stylesheets = True
    remove_javascript = True
-
+    remove_empty_feeds = True
    extra_css = '''
            img{clear: both;}
    '''
--- a/recipes/lomza.recipe
+++ b/recipes/lomza.recipe
@ -8,6 +8,7 @@ class Lomza(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 15
    no_stylesheets = True
+    extra_css = '#foto {float: right; max-width: 200px; margin-left: 10px;} #fotogaleria > div {float:left;} .br {clear: both;}'
    max_articles_per_feed = 100
    remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})]
    keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})]
--- a/recipes/nrc_next.recipe
+++ b/recipes/nrc_next.recipe
@ -0,0 +1,75 @@
+#!/usr/bin/env  python2
+# -*- coding: utf-8 -*-
+# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Niels Giesen'
+
+'''
+www.nrc.nl
+'''
+import os, zipfile
+import time
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+
+class NRCNext(BasicNewsRecipe):
+
+    title = u'nrc•next'
+    description = u'De ePaper-versie van nrc•next'
+    language = 'nl'
+    lang = 'nl-NL'
+    needs_subscription = True
+
+    __author__ = 'Niels Giesen'
+
+    conversion_options = {
+        'no_default_epub_cover' : True
+    }
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        if self.username is not None and self.password is not None:
+            br.open('http://login.nrc.nl/login')
+            br.select_form(nr=0)
+            br['username'] = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
+    def build_index(self):
+
+        today = time.strftime("%Y%m%d")
+
+        domain = "http://digitaleeditie.nrc.nl"
+
+        url = domain + "/digitaleeditie/helekrant/epub/nn_" + today + ".epub"
+        #print url
+
+        try:
+            br = self.get_browser()
+            f = br.open(url)
+        except:
+            self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
+            raise ValueError('Krant van vandaag nog niet beschikbaar')
+
+        tmp = PersistentTemporaryFile(suffix='.epub')
+        self.report_progress(0,_('downloading epub'))
+        tmp.write(f.read())
+        f.close()
+        br.close()
+        if zipfile.is_zipfile(tmp):
+            try:
+                zfile = zipfile.ZipFile(tmp.name, 'r')
+                zfile.extractall(self.output_dir)
+                self.report_progress(0,_('extracting epub'))
+            except zipfile.BadZipfile:
+                self.report_progress(0,_('BadZip error, continuing'))
+
+        tmp.close()
+        index = os.path.join(self.output_dir, 'metadata.opf')
+
+        self.report_progress(1,_('epub downloaded and extracted'))
+
+        return index
--- a/recipes/nsfw_corp.recipe
+++ b/recipes/nsfw_corp.recipe
@ -1,11 +1,9 @@
-
 __license__   = 'GPL v3'
-__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2012-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nsfwcorp.com
 '''

-import urllib
 from calibre.web.feeds.news import BasicNewsRecipe

 class NotSafeForWork(BasicNewsRecipe):
@ -20,8 +18,8 @@ class NotSafeForWork(BasicNewsRecipe):
    needs_subscription     = True
    auto_cleanup           = False
    INDEX                  = 'https://www.nsfwcorp.com'
-    LOGIN                  = INDEX + '/login/target/'
-    SETTINGS               = INDEX + '/settings/'
+    LOGIN                  = INDEX + '/account/login/?next=%2F'
+    SETTINGS               = INDEX + '/account/settings/'
    use_embedded_content   = True
    language               = 'en'
    publication_type       = 'magazine'
@ -48,19 +46,20 @@ class NotSafeForWork(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
-        br.open(self.LOGIN)
+        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({ 'email':self.username
-                                     ,'password':self.password
-                                   })
-            br.open(self.LOGIN, data)
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['email'   ] = self.username
+            br['password'] = self.password
+            br.submit()
        return br

    def get_feeds(self):
        self.feeds = []
        soup = self.index_to_soup(self.SETTINGS)
        for item in soup.findAll('input', attrs={'type':'text'}):
-            if item.has_key('value') and item['value'].startswith('http://www.nsfwcorp.com/feed/'):
+            if item.has_key('value') and item['value'].startswith('https://www.nsfwcorp.com/feed/'):
               self.feeds.append(item['value'])
               return self.feeds
        return self.feeds
--- a/recipes/pc_lab.recipe
+++ b/recipes/pc_lab.recipe
@ -1,6 +1,6 @@
 #!/usr/bin/env  python
 from calibre.web.feeds.recipes import BasicNewsRecipe
-
+from calibre.ebooks.BeautifulSoup import Comment
 class PCLab(BasicNewsRecipe):
    cover_url             = 'http://pclab.pl/img/logo.png'
    title                 = u"PC Lab"
@ -52,6 +52,9 @@ class PCLab(BasicNewsRecipe):
            pager = soup2.find('div', attrs={'class':'next'})
            pagetext = soup2.find('div', attrs={'class':'substance'})
            pagetext = pagetext.find('div', attrs={'class':'data'})
+            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+            for comment in comments:
+                comment.extract()

            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
--- a/recipes/swiatkindle.recipe
+++ b/recipes/swiatkindle.recipe
@ -10,7 +10,7 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class swiatczytnikow(BasicNewsRecipe):
-    title          = u'Swiat Czytnikow'
+    title          = u'Świat Czytników'
    description    = u'Czytniki e-książek w Polsce. Jak wybrać, kupić i korzystać z Amazon Kindle i innych'
    language = 'pl'
    __author__ = u'Tomasz D\u0142ugosz'
--- a/recipes/weblogs_sl.recipe
+++ b/recipes/weblogs_sl.recipe
@ -3,7 +3,7 @@ __license__     = 'GPL v3'
 __copyright__   = '4 February 2011, desUBIKado'
 __author__      = 'desUBIKado'
 __version__     = 'v0.09'
-__date__        = '02, December 2012'
+__date__        = '14, May 2013'
 '''
 http://www.weblogssl.com/
 '''
@ -56,15 +56,16 @@ class weblogssl(BasicNewsRecipe):
                          ,(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom')
                          ,(u'Fandemia', u'http://feeds.weblogssl.com/fandemia')
                          ,(u'Tendencias', u'http://feeds.weblogssl.com/trendencias')
-                          ,(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas')
+                          ,(u'Tendencias Belleza', u'http://feeds.weblogssl.com/trendenciasbelleza')
+                          ,(u'Tendencias Hombre', u'http://feeds.weblogssl.com/trendenciashombre')
+                          ,(u'Tendencias Shopping', u'http://feeds.weblogssl.com/trendenciasshopping')
                          ,(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar')
                          ,(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion')
                          ,(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera')
                          ,(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia')
                          ,(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica')
                          ,(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg')
-                          ,(u'Tendencias Belleza', u'http://feeds.weblogssl.com/trendenciasbelleza')
-                          ,(u'Tendencias Hombre', u'http://feeds.weblogssl.com/trendenciashombre')
+                          ,(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas')
                          ,(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas')
                          ,(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion')
                          ,(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1')
@ -119,23 +120,6 @@ class weblogssl(BasicNewsRecipe):

        return soup

-    # Para obtener la url original del articulo a partir de la de "feedsportal"
-    # El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
-    # http://www.mobileread.com/forums/showthread.php?t=130297

    def get_article_url(self, article):
-       link = article.get('link', None)
-       if link is None:
-           return article
-       # if link.split('/')[-4]=="xataka2":
-       #     return article.get('feedburner_origlink', article.get('link', article.get('guid')))
-       if link.split('/')[-4]=="xataka2":
           return article.get('guid', None)
-       if link.split('/')[-1]=="story01.htm":
-           link=link.split('/')[-2]
-           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
-           b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
-           for i in range(0,len(a)):
-              link=link.replace(a[i],b[i])
-           link="http://"+link
-       return link
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -32,7 +32,7 @@ defaults.
 # Set the use_series_auto_increment_tweak_when_importing tweak to True to
 # use the above values when importing/adding books. If this tweak is set to
 # False (the default) then the series number will be set to 1 if it is not
-# explicitly set to during the import. If set to True, then the
+# explicitly set during the import. If set to True, then the
 # series index will be set according to the series_index_auto_increment setting.
 # Note that the use_series_auto_increment_tweak_when_importing tweak is used
 # only when a value is not provided during import. If the importing regular
@ -536,3 +536,4 @@ many_libraries = 10
 # yellow when using a Virtual Library. By setting this to False, you can turn
 # that off.
 highlight_virtual_library_book_count = True
+
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -38,7 +38,7 @@ binary_includes = [
                '/lib/libz.so.1',
                '/usr/lib/libtiff.so.5',
                '/lib/libbz2.so.1',
-                '/usr/lib/libpoppler.so.28',
+                '/usr/lib/libpoppler.so.37',
                '/usr/lib/libxml2.so.2',
                '/usr/lib/libopenjpeg.so.2',
                '/usr/lib/libxslt.so.1',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -378,7 +378,7 @@ class Py2App(object):
    @flush
    def add_poppler(self):
        info('\nAdding poppler')
-        for x in ('libpoppler.28.dylib',):
+        for x in ('libpoppler.37.dylib',):
            self.install_dylib(os.path.join(SW, 'lib', x))
        for x in ('pdftohtml', 'pdftoppm', 'pdfinfo'):
            self.install_dylib(os.path.join(SW, 'bin', x), False)
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -116,7 +116,9 @@ tarball. Edit setup.py and set zip_safe=False. Then run::

 Run the following command to install python dependencies::

-    easy_install --always-unzip -U mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto cssselect
+    easy_install --always-unzip -U mechanize python-dateutil dnspython cssutils clientform pycrypto cssselect
+
+Install pyreadline from https://pypi.python.org/pypi/pyreadline/2.0

 Install pywin32 and edit win32com\__init__.py setting _frozen = True and
 __gen_path__ to a temp dir (otherwise it tries to set it to a dir in the
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,14 +12,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-04-21 08:00+0000\n"
+"PO-Revision-Date: 2013-05-06 09:36+0000\n"
 "Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-04-22 05:23+0000\n"
-"X-Generator: Launchpad (build 16567)\n"
+"X-Launchpad-Export-Date: 2013-05-07 05:28+0000\n"
+"X-Generator: Launchpad (build 16598)\n"
 "Language: ca\n"

 #. name for aaa
@ -2024,7 +2024,7 @@ msgstr "Àzeri meridional"

 #. name for aze
 msgid "Azerbaijani"
-msgstr "Serbi"
+msgstr ""

 #. name for azg
 msgid "Amuzgo; San Pedro Amuzgos"
@ -7288,7 +7288,7 @@ msgstr "Epie"

 #. name for epo
 msgid "Esperanto"
-msgstr "Alemany"
+msgstr "Esperanto"

 #. name for era
 msgid "Eravallan"
@ -21816,7 +21816,7 @@ msgstr "Ramoaaina"

 #. name for raj
 msgid "Rajasthani"
-msgstr "Marwari"
+msgstr ""

 #. name for rak
 msgid "Tulu-Bohuai"
--- a/setup/iso_639/cs.po
+++ b/setup/iso_639/cs.po
@ -13762,7 +13762,7 @@ msgstr ""

 #. name for lav
 msgid "Latvian"
-msgstr "litevština"
+msgstr ""

 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/da.po
+++ b/setup/iso_639/da.po
@ -1429,7 +1429,7 @@ msgstr ""

 #. name for arg
 msgid "Aragonese"
-msgstr "Færøsk"
+msgstr ""

 #. name for arh
 msgid "Arhuaco"
--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
@ -18,14 +18,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-04-11 13:29+0000\n"
+"PO-Revision-Date: 2013-05-06 09:41+0000\n"
 "Last-Translator: Simon Schütte <simonschuette@arcor.de>\n"
 "Language-Team: Ubuntu German Translators\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-04-12 05:20+0000\n"
-"X-Generator: Launchpad (build 16564)\n"
+"X-Launchpad-Export-Date: 2013-05-07 05:29+0000\n"
+"X-Generator: Launchpad (build 16598)\n"
 "Language: de\n"

 #. name for aaa
@ -319,7 +319,7 @@ msgstr "Adangme"

 #. name for adb
 msgid "Adabe"
-msgstr "Adangme"
+msgstr "Adabe"

 #. name for add
 msgid "Dzodinka"
@ -367,7 +367,7 @@ msgstr "Adap"

 #. name for adq
 msgid "Adangbe"
-msgstr "Adangme"
+msgstr "Adangbe"

 #. name for adr
 msgid "Adonara"
--- a/setup/iso_639/eu.po
+++ b/setup/iso_639/eu.po
@ -2022,7 +2022,7 @@ msgstr ""

 #. name for aze
 msgid "Azerbaijani"
-msgstr "Turkiera"
+msgstr ""

 #. name for azg
 msgid "Amuzgo; San Pedro Amuzgos"
@ -13126,7 +13126,7 @@ msgstr ""

 #. name for kur
 msgid "Kurdish"
-msgstr "Turkiera"
+msgstr ""

 #. name for kus
 msgid "Kusaal"
@ -16190,7 +16190,7 @@ msgstr ""

 #. name for mlt
 msgid "Maltese"
-msgstr "Koreera"
+msgstr ""

 #. name for mlu
 msgid "To'abaita"
--- a/setup/iso_639/gl.po
+++ b/setup/iso_639/gl.po
@ -13764,7 +13764,7 @@ msgstr "Laba"

 #. name for lav
 msgid "Latvian"
-msgstr "Lituano"
+msgstr ""

 #. name for law
 msgid "Lauje"
@ -22212,7 +22212,7 @@ msgstr "Roglai do norte"

 #. name for roh
 msgid "Romansh"
-msgstr "Romanés"
+msgstr ""

 #. name for rol
 msgid "Romblomanon"
--- a/setup/iso_639/hu.po
+++ b/setup/iso_639/hu.po
@ -20538,7 +20538,7 @@ msgstr ""

 #. name for peo
 msgid "Persian; Old (ca. 600-400 B.C.)"
-msgstr "perzsa"
+msgstr ""

 #. name for pep
 msgid "Kunja"
--- a/setup/iso_639/is.po
+++ b/setup/iso_639/is.po
@ -15049,7 +15049,7 @@ msgstr "Magahi"

 #. name for mah
 msgid "Marshallese"
-msgstr "Maltneska"
+msgstr ""

 #. name for mai
 msgid "Maithili"
--- a/setup/iso_639/ko.po
+++ b/setup/iso_639/ko.po
@ -3742,7 +3742,7 @@ msgstr ""

 #. name for bre
 msgid "Breton"
-msgstr "프랑스어"
+msgstr ""

 #. name for brf
 msgid "Bera"
--- a/setup/iso_639/mr.po
+++ b/setup/iso_639/mr.po
@ -6804,7 +6804,7 @@ msgstr "डोगोन; तेबुल उरे"

 #. name for dua
 msgid "Duala"
-msgstr "ड्युला"
+msgstr ""

 #. name for dub
 msgid "Dubli"
--- a/setup/iso_639/nb.po
+++ b/setup/iso_639/nb.po
@ -27790,7 +27790,7 @@ msgstr ""

 #. name for wln
 msgid "Walloon"
-msgstr "Vietnamesisk"
+msgstr ""

 #. name for wlo
 msgid "Wolio"
--- a/setup/iso_639/oc.po
+++ b/setup/iso_639/oc.po
@ -9862,7 +9862,7 @@ msgstr "Hya"

 #. name for hye
 msgid "Armenian"
-msgstr "Albanés"
+msgstr ""

 #. name for iai
 msgid "Iaai"
@ -13762,7 +13762,7 @@ msgstr "Laba"

 #. name for lav
 msgid "Latvian"
-msgstr "Lituanian"
+msgstr ""

 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/ru.po
+++ b/setup/iso_639/ru.po
@ -2089,7 +2089,7 @@ msgstr "Башкирский"

 #. name for bal
 msgid "Baluchi"
-msgstr "Балийский"
+msgstr ""

 #. name for bam
 msgid "Bambara"
--- a/setup/iso_639/sk.po
+++ b/setup/iso_639/sk.po
@ -13763,7 +13763,7 @@ msgstr ""

 #. name for lav
 msgid "Latvian"
-msgstr "Lotyšský"
+msgstr ""

 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/sv.po
+++ b/setup/iso_639/sv.po
--- a/setup/iso_639/zh_CN.po
+++ b/setup/iso_639/zh_CN.po
@ -1016,7 +1016,7 @@ msgstr ""

 #. name for amh
 msgid "Amharic"
-msgstr "阿拉伯语"
+msgstr ""

 #. name for ami
 msgid "Amis"
--- a/setup/translations.py
+++ b/setup/translations.py
@ -63,7 +63,6 @@ class POT(Command): # {{{

        return '\n'.join(ans)

-
    def run(self, opts):
        pot_header = textwrap.dedent('''\
        # Translation template file..
@ -117,7 +116,6 @@ class POT(Command): # {{{
                f.write(src)
            self.info('Translations template:', os.path.abspath(pot))

-
        return pot
 # }}}

@ -134,6 +132,7 @@ class Translations(POT): # {{{
        return locale, os.path.join(self.DEST, locale, 'messages.mo')

    def run(self, opts):
+        self.iso639_errors = []
        for f in self.po_files():
            locale, dest = self.mo_file(f)
            base = os.path.dirname(dest)
@ -146,18 +145,46 @@ class Translations(POT): # {{{
                    '%s.po'%iscpo)

            if os.path.exists(iso639):
+                self.check_iso639(iso639)
                dest = self.j(self.d(dest), 'iso639.mo')
                if self.newer(dest, iso639):
-                    self.info('\tCopying ISO 639 translations')
+                    self.info('\tCopying ISO 639 translations for %s' % iscpo)
                    subprocess.check_call(['msgfmt', '-o', dest, iso639])
            elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc',
                    'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku',
                    'fr_CA', 'him', 'jv', 'ka', 'fur', 'ber'):
                self.warn('No ISO 639 translations for locale:', locale)

+        if self.iso639_errors:
+            for err in self.iso639_errors:
+                print (err)
+            raise SystemExit(1)
+
        self.write_stats()
        self.freeze_locales()

+    def check_iso639(self, path):
+        from calibre.utils.localization import langnames_to_langcodes
+        with open(path, 'rb') as f:
+            raw = f.read()
+        rmap = {}
+        msgid = None
+        for match in re.finditer(r'^(msgid|msgstr)\s+"(.*?)"', raw, re.M):
+            if match.group(1) == 'msgid':
+                msgid = match.group(2)
+            else:
+                msgstr = match.group(2)
+                if not msgstr:
+                    continue
+                omsgid = rmap.get(msgstr, None)
+                if omsgid is not None:
+                    cm = langnames_to_langcodes([omsgid, msgid])
+                    if cm[msgid] and cm[omsgid] and cm[msgid] != cm[omsgid]:
+                        self.iso639_errors.append('In file %s the name %s is used as translation for both %s and %s' % (
+                            os.path.basename(path), msgstr, msgid, rmap[msgstr]))
+                    # raise SystemExit(1)
+                rmap[msgstr] = msgid
+
    def freeze_locales(self):
        zf = self.DEST + '.zip'
        from calibre import CurrentDir
@ -191,7 +218,6 @@ class Translations(POT): # {{{
            locale = self.mo_file(f)[0]
            stats[locale] = min(1.0, float(trans)/total)

-
        import cPickle
        cPickle.dump(stats, open(dest, 'wb'), -1)

--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 29)
+numeric_version = (0, 9, 30)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

@ -66,10 +66,8 @@ else:
            filesystem_encoding = 'utf-8'
            # On linux, unicode arguments to os file functions are coerced to an ascii
            # bytestring if sys.getfilesystemencoding() == 'ascii', which is
-            # just plain dumb. So issue a warning.
-            print ('WARNING: You do not have the LANG environment variable set correctly. '
-                    'This will cause problems with non-ascii filenames. '
-                    'Set it to something like en_US.UTF-8.\n')
+            # just plain dumb. This is fixed by the icu.py module which, when
+            # imported changes ascii to utf-8
    except:
        filesystem_encoding = 'utf-8'

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1548,12 +1548,13 @@ class StoreNextoStore(StoreBase):

 class StoreNookUKStore(StoreBase):
    name = 'Nook UK'
-    author = 'John Schember'
-    description = u'Barnes & Noble S.Ã  r.l, a subsidiary of Barnes & Noble, Inc., a leading retailer of content, digital media and educational products, is proud to bring the award-winning NOOKÂ® reading experience and a leading digital bookstore to the UK.'  # noqa
+    author = 'Charles Haley'
+    description = u'Barnes & Noble S.A.R.L, a subsidiary of Barnes & Noble, Inc., a leading retailer of content, digital media and educational products, is proud to bring the award-winning NOOK reading experience and a leading digital bookstore to the UK.'  # noqa
    actual_plugin = 'calibre.gui2.store.stores.nook_uk_plugin:NookUKStore'

    headquarters = 'UK'
    formats = ['NOOK']
+    affiliate = True

 class StoreOpenBooksStore(StoreBase):
    name = 'Open Books'
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -240,7 +240,8 @@ class ANDROID(USBMS):
            'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
            'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
            'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS',
-            'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894']
+            'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894', '_USB',
+    ]
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -251,7 +252,9 @@ class ANDROID(USBMS):
            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875',
            'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
            'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
-            'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894']
+            'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894',
+            '_USB',
+    ]

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -19,10 +19,10 @@ class BLACKBERRY(USBMS):

    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]
+    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220, 0x232]

    VENDOR_NAME = 'RIM'
-    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['BLACKBERRY_SD', 'BLACKBERRY']

    MAIN_MEMORY_VOLUME_LABEL  = 'Blackberry SD Card'

--- a/src/calibre/devices/idevice/init.py
+++ b/src/calibre/devices/idevice/init.py
@ -0,0 +1,2 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
--- a/src/calibre/devices/idevice/libimobiledevice.py
+++ b/src/calibre/devices/idevice/libimobiledevice.py
--- a/src/calibre/devices/idevice/parse_xml.py
+++ b/src/calibre/devices/idevice/parse_xml.py
@ -0,0 +1,300 @@
+#!/usr/bin/env python
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+"""
+https://github.com/ishikawa/python-plist-parser/blob/master/plist_parser.py
+
+A `Property Lists`_ is a data representation used in Apple's Mac OS X as
+a convenient way to store standard object types, such as string, number,
+boolean, and container object.
+
+This file contains a class ``XmlPropertyListParser`` for parse
+a property list file and get back a python native data structure.
+
+    :copyright: 2008 by Takanori Ishikawa <takanori.ishikawa@gmail.com>
+    :license: MIT (See LICENSE file for more details)
+
+.. _Property Lists: http://developer.apple.com/documentation/Cocoa/Conceptual/PropertyLists/
+"""
+
+
+class PropertyListParseError(Exception):
+    """Raised when parsing a property list is failed."""
+    pass
+
+
+class XmlPropertyListParser(object):
+    """
+    The ``XmlPropertyListParser`` class provides methods that
+    convert `Property Lists`_ objects from xml format.
+    Property list objects include ``string``, ``unicode``,
+    ``list``, ``dict``, ``datetime``, and ``int`` or ``float``.
+
+        :copyright: 2008 by Takanori Ishikawa <takanori.ishikawa@gmail.com>
+        :license: MIT License
+
+    .. _Property List: http://developer.apple.com/documentation/Cocoa/Conceptual/PropertyLists/
+    """
+
+    def _assert(self, test, message):
+        if not test:
+            raise PropertyListParseError(message)
+
+    # ------------------------------------------------
+    # SAX2: ContentHandler
+    # ------------------------------------------------
+    def setDocumentLocator(self, locator):
+        pass
+
+    def startPrefixMapping(self, prefix, uri):
+        pass
+
+    def endPrefixMapping(self, prefix):
+        pass
+
+    def startElementNS(self, name, qname, attrs):
+        pass
+
+    def endElementNS(self, name, qname):
+        pass
+
+    def ignorableWhitespace(self, whitespace):
+        pass
+
+    def processingInstruction(self, target, data):
+        pass
+
+    def skippedEntity(self, name):
+        pass
+
+    def startDocument(self):
+        self.__stack = []
+        self.__plist = self.__key = self.__characters = None
+        # For reducing runtime type checking,
+        # the parser caches top level object type.
+        self.__in_dict = False
+
+    def endDocument(self):
+        self._assert(self.__plist is not None, "A top level element must be <plist>.")
+        self._assert(
+            len(self.__stack) is 0,
+            "multiple objects at top level.")
+
+    def startElement(self, name, attributes):
+        if name in XmlPropertyListParser.START_CALLBACKS:
+            XmlPropertyListParser.START_CALLBACKS[name](self, name, attributes)
+        if name in XmlPropertyListParser.PARSE_CALLBACKS:
+            self.__characters = []
+
+    def endElement(self, name):
+        if name in XmlPropertyListParser.END_CALLBACKS:
+            XmlPropertyListParser.END_CALLBACKS[name](self, name)
+        if name in XmlPropertyListParser.PARSE_CALLBACKS:
+            # Creates character string from buffered characters.
+            content = ''.join(self.__characters)
+            # For compatibility with ``xml.etree`` and ``plistlib``,
+            # convert text string to ascii, if possible
+            try:
+                content = content.encode('ascii')
+            except (UnicodeError, AttributeError):
+                pass
+            XmlPropertyListParser.PARSE_CALLBACKS[name](self, name, content)
+            self.__characters = None
+
+    def characters(self, content):
+        if self.__characters is not None:
+            self.__characters.append(content)
+
+    # ------------------------------------------------
+    # XmlPropertyListParser private
+    # ------------------------------------------------
+    def _push_value(self, value):
+        if not self.__stack:
+            self._assert(self.__plist is None, "Multiple objects at top level")
+            self.__plist = value
+        else:
+            top = self.__stack[-1]
+            #assert isinstance(top, (dict, list))
+            if self.__in_dict:
+                k = self.__key
+                if k is None:
+                    raise PropertyListParseError("Missing key for dictionary.")
+                top[k] = value
+                self.__key = None
+            else:
+                top.append(value)
+
+    def _push_stack(self, value):
+        self.__stack.append(value)
+        self.__in_dict = isinstance(value, dict)
+
+    def _pop_stack(self):
+        self.__stack.pop()
+        self.__in_dict = self.__stack and isinstance(self.__stack[-1], dict)
+
+    def _start_plist(self, name, attrs):
+        self._assert(not self.__stack and self.__plist is None, "<plist> more than once.")
+        self._assert(attrs.get('version', '1.0') == '1.0',
+                     "version 1.0 is only supported, but was '%s'." % attrs.get('version'))
+
+    def _start_array(self, name, attrs):
+        v = list()
+        self._push_value(v)
+        self._push_stack(v)
+
+    def _start_dict(self, name, attrs):
+        v = dict()
+        self._push_value(v)
+        self._push_stack(v)
+
+    def _end_array(self, name):
+        self._pop_stack()
+
+    def _end_dict(self, name):
+        if self.__key is not None:
+            raise PropertyListParseError("Missing value for key '%s'" % self.__key)
+        self._pop_stack()
+
+    def _start_true(self, name, attrs):
+        self._push_value(True)
+
+    def _start_false(self, name, attrs):
+        self._push_value(False)
+
+    def _parse_key(self, name, content):
+        if not self.__in_dict:
+            print("XmlPropertyListParser() WARNING: ignoring <key>%s</key> (<key> elements must be contained in <dict> element)" % content)
+            #raise PropertyListParseError("<key> element '%s' must be in <dict> element." % content)
+        else:
+            self.__key = content
+
+    def _parse_string(self, name, content):
+        self._push_value(content)
+
+    def _parse_data(self, name, content):
+        import base64
+        self._push_value(base64.b64decode(content))
+
+    # http://www.apple.com/DTDs/PropertyList-1.0.dtd says:
+    #
+    # Contents should conform to a subset of ISO 8601
+    # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.
+    # Smaller units may be omitted with a loss of precision)
+    import re
+    DATETIME_PATTERN = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z$")
+
+    def _parse_date(self, name, content):
+        import datetime
+
+        units = ('year', 'month', 'day', 'hour', 'minute', 'second', )
+        pattern = XmlPropertyListParser.DATETIME_PATTERN
+        match = pattern.match(content)
+        if not match:
+            raise PropertyListParseError("Failed to parse datetime '%s'" % content)
+
+        groups, components = match.groupdict(), []
+        for key in units:
+            value = groups[key]
+            if value is None:
+                break
+            components.append(int(value))
+        while len(components) < 3:
+            components.append(1)
+
+        d = datetime.datetime(*components)
+        self._push_value(d)
+
+    def _parse_real(self, name, content):
+        self._push_value(float(content))
+
+    def _parse_integer(self, name, content):
+        self._push_value(int(content))
+
+    START_CALLBACKS = {
+        'plist': _start_plist,
+        'array': _start_array,
+        'dict': _start_dict,
+        'true': _start_true,
+        'false': _start_false,
+    }
+
+    END_CALLBACKS = {
+        'array': _end_array,
+        'dict': _end_dict,
+    }
+
+    PARSE_CALLBACKS = {
+        'key': _parse_key,
+        'string': _parse_string,
+        'data': _parse_data,
+        'date': _parse_date,
+        'real': _parse_real,
+        'integer': _parse_integer,
+    }
+
+    # ------------------------------------------------
+    # XmlPropertyListParser
+    # ------------------------------------------------
+    def _to_stream(self, io_or_string):
+        if isinstance(io_or_string, basestring):
+            # Creates a string stream for in-memory contents.
+            from cStringIO import StringIO
+            return StringIO(io_or_string)
+        elif hasattr(io_or_string, 'read') and callable(getattr(io_or_string, 'read')):
+            return io_or_string
+        else:
+            raise TypeError('Can\'t convert %s to file-like-object' % type(io_or_string))
+
+    def _parse_using_etree(self, xml_input):
+        from xml.etree.cElementTree import iterparse
+
+        parser = iterparse(self._to_stream(xml_input), events=(b'start', b'end'))
+        self.startDocument()
+        try:
+            for action, element in parser:
+                name = element.tag
+                if action == 'start':
+                    if name in XmlPropertyListParser.START_CALLBACKS:
+                        XmlPropertyListParser.START_CALLBACKS[name](self, element.tag, element.attrib)
+                elif action == 'end':
+                    if name in XmlPropertyListParser.END_CALLBACKS:
+                        XmlPropertyListParser.END_CALLBACKS[name](self, name)
+                    if name in XmlPropertyListParser.PARSE_CALLBACKS:
+                        XmlPropertyListParser.PARSE_CALLBACKS[name](self, name, element.text or "")
+                    element.clear()
+        except SyntaxError, e:
+            raise PropertyListParseError(e)
+
+        self.endDocument()
+        return self.__plist
+
+    def _parse_using_sax_parser(self, xml_input):
+        from xml.sax import make_parser, xmlreader, SAXParseException
+        source = xmlreader.InputSource()
+        source.setByteStream(self._to_stream(xml_input))
+        reader = make_parser()
+        reader.setContentHandler(self)
+        try:
+            reader.parse(source)
+        except SAXParseException, e:
+            raise PropertyListParseError(e)
+
+        return self.__plist
+
+    def parse(self, xml_input):
+        """
+        Parse the property list (`.plist`, `.xml, for example) ``xml_input``,
+        which can be either a string or a file-like object.
+
+        >>> parser = XmlPropertyListParser()
+        >>> parser.parse(r'<plist version="1.0">'
+        ...              r'<dict><key>Python</key><string>.py</string></dict>'
+        ...              r'</plist>')
+        {'Python': '.py'}
+        """
+        try:
+            return self._parse_using_etree(xml_input)
+        except ImportError:
+            # No xml.etree.ccElementTree found.
+            return self._parse_using_sax_parser(xml_input)
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -107,6 +107,12 @@ class DevicePlugin(Plugin):
    #: :meth:`set_user_blacklisted_devices`
    ASK_TO_ALLOW_CONNECT = False

+    #: Set this to a dictionary of the form {'title':title, 'msg':msg, 'det_msg':detailed_msg} to have calibre popup
+    #: a message to the user after some callbacks are run (currently only upload_books).
+    #: Be careful to not spam the user with too many messages. This variable is checked after *every* callback,
+    #: so only set it when you really need to.
+    user_feedback_after_callback = None
+
    @classmethod
    def get_gui_name(cls):
        if hasattr(cls, 'gui_name'):
@ -165,8 +171,7 @@ class DevicePlugin(Plugin):
                                            'rev_')[-1].replace(':', 'a'), 16)
                            except:
                                bcd = None
-                           return True, (vendor_id, product_id, bcd, None,
-                                   None, None)
+                            return True, (vendor_id, product_id, bcd, None, None, None)
        return False, None

    def test_bcd(self, bcdDevice, bcd):
@ -638,7 +643,6 @@ class DevicePlugin(Plugin):
        '''
        device_prefs.set_overrides()

-
    # Dynamic control interface.
    # The following methods are probably called on the GUI thread. Any driver
    # that implements these methods must take pains to be thread safe, because
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -35,7 +35,7 @@ class KOBO(USBMS):
    gui_name = 'Kobo Reader'
    description = _('Communicate with the Kobo Reader')
    author = 'Timothy Legge and David Forrester'
-    version = (2, 0, 9)
+    version = (2, 0, 10)

    dbversion = 0
    fwversion = 0
@ -45,6 +45,7 @@ class KOBO(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']

    booklist_class = CollectionsBookList
+    book_class = Book

    # Ordered list of supported formats
    FORMATS     = ['epub', 'pdf', 'txt', 'cbz', 'cbr']
@ -115,7 +116,6 @@ class KOBO(USBMS):

    def initialize(self):
        USBMS.initialize(self)
-        self.book_class = Book
        self.dbversion = 7

    def books(self, oncard=None, end_session=True):
@ -1213,7 +1213,7 @@ class KOBOTOUCH(KOBO):
    min_dbversion_archive           = 71
    min_dbversion_images_on_sdcard  = 77

-    max_supported_fwversion         = (2,5,1)
+    max_supported_fwversion         = (2,5,3)
    min_fwversion_images_on_sdcard  = (2,4,1)

    has_kepubs = True
@ -1237,11 +1237,9 @@ class KOBOTOUCH(KOBO):
            _('Keep cover aspect ratio') +
            ':::'+_('When uploading covers, do not change the aspect ratio when resizing for the device.'
                    ' This is for firmware versions 2.3.1 and later.'),
-            _('Show expired books') +
-            ':::'+_('A bug in an earlier version left non kepubs book records'
-                ' in the database.  With this option Calibre will show the '
-                'expired records and allow you to delete them with '
-                'the new delete logic.'),
+            _('Show archived books') +
+            ':::'+_('Archived books are listed on the device but need to be downloaded to read.'
+                    ' Use this option to show these books and match them with books in the calibre library.'),
            _('Show Previews') +
            ':::'+_('Kobo previews are included on the Touch and some other versions'
                ' by default they are no longer displayed as there is no good reason to '
@ -1289,7 +1287,7 @@ class KOBOTOUCH(KOBO):
    OPT_UPLOAD_COVERS               = 3
    OPT_UPLOAD_GRAYSCALE_COVERS     = 4
    OPT_KEEP_COVER_ASPECT_RATIO     = 5
-    OPT_SHOW_EXPIRED_BOOK_RECORDS   = 6
+    OPT_SHOW_ARCHIVED_BOOK_RECORDS  = 6
    OPT_SHOW_PREVIEWS               = 7
    OPT_SHOW_RECOMMENDATIONS        = 8
    OPT_UPDATE_SERIES_DETAILS       = 9
@ -1347,6 +1345,10 @@ class KOBOTOUCH(KOBO):
        self.set_device_name()
        return super(KOBOTOUCH, self).get_device_information(end_session)

+
+    def device_database_path(self):
+        return self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')
+
    def books(self, oncard=None, end_session=True):
        debug_print("KoboTouch:books - oncard='%s'"%oncard)
        from calibre.ebooks.metadata.meta import path_to_ext
@ -1599,9 +1601,7 @@ class KOBOTOUCH(KOBO):

        self.debug_index = 0
        import sqlite3 as sqlite
-        with closing(sqlite.connect(
-            self.normalize_path(self._main_prefix +
-                '.kobo/KoboReader.sqlite'))) as connection:
+        with closing(sqlite.connect(self.device_database_path())) as connection:
            debug_print("KoboTouch:books - reading device database")

            # return bytestrings if the content cannot the decoded as unicode
@ -1618,7 +1618,21 @@ class KOBOTOUCH(KOBO):
            debug_print("KoboTouch:books - shelf list:", self.bookshelvelist)

            opts = self.settings()
-            if self.supports_series():
+            if self.supports_kobo_archive():
+                query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \
+                    "ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \
+                    "IsDownloaded, Series, SeriesNumber, ___UserID " \
+                    " from content " \
+                    " where BookID is Null " \
+                    " and ((Accessibility = -1 and IsDownloaded in ('true', 1 )) or (Accessibility in (1,2) %(expiry)s) " \
+                    "    %(previews)s %(recomendations)s )" \
+                    " and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) and ContentType = 6)") % \
+                        dict(\
+                             expiry="" if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else "and IsDownloaded in ('true', 1)", \
+                             previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \
+                             recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \
+                             )
+            elif self.supports_series():
                query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \
                    "ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \
                    "IsDownloaded, Series, SeriesNumber, ___UserID " \
@ -1627,7 +1641,7 @@ class KOBOTOUCH(KOBO):
                    " and ((Accessibility = -1 and IsDownloaded in ('true', 1)) or (Accessibility in (1,2)) %(previews)s %(recomendations)s )" \
                    " and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s") % \
                        dict(\
-                             expiry=" and ContentType = 6)" if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ")", \
+                             expiry=" and ContentType = 6)" if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ")", \
                             previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \
                             recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \
                             )
@ -1638,7 +1652,7 @@ class KOBOTOUCH(KOBO):
                    ' from content ' \
                    ' where BookID is Null %(previews)s %(recomendations)s and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % \
                        dict(\
-                             expiry=' and ContentType = 6)' if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')', \
+                             expiry=' and ContentType = 6)' if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ')', \
                             previews=' and Accessibility <> 6' if opts.extra_customization[self.OPT_SHOW_PREVIEWS] == False else '', \
                             recomendations=' and IsDownloaded in (\'true\', 1)' if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] == False else ''\
                             )
@ -1648,7 +1662,7 @@ class KOBOTOUCH(KOBO):
                    '"1" as IsDownloaded, null as Series, null as SeriesNumber, ___UserID' \
                    ' from content where ' \
                    'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
-                    if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
+                    if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ')')
            else:
                query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
                    'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, ' \
@ -2586,7 +2600,7 @@ class KOBOTOUCH(KOBO):
    def modify_database_check(self, function):
        # Checks to see whether the database version is supported
        # and whether the user has chosen to support the firmware version
-#        debug_print("KoboTouch:modify_database_check - self.fwversion <= self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion)
+#        debug_print("KoboTouch:modify_database_check - self.fwversion > self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion)
        if self.dbversion > self.supported_dbversion or self.fwversion > self.max_supported_fwversion:
            # Unsupported database
            opts = self.settings()
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -50,10 +50,10 @@ class PRST1(USBMS):

    VENDOR_NAME        = 'SONY'
    WINDOWS_MAIN_MEM   = re.compile(
-            r'(PRS-T(1|2)&)'
+            r'(PRS-T(1|2|2N)&)'
            )
    WINDOWS_CARD_A_MEM = re.compile(
-            r'(PRS-T(1|2)__SD&)'
+            r'(PRS-T(1|2|2N)__SD&)'
            )
    MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'
@ -66,7 +66,7 @@ class PRST1(USBMS):

    EXTRA_CUSTOMIZATION_MESSAGE = [
        _('Comma separated list of metadata fields '
-            'to turn into collections on the device. Possibilities include: ')+\
+            'to turn into collections on the device. Possibilities include: ')+
                    'series, tags, authors',
        _('Upload separate cover thumbnails for books') +
             ':::'+_('Normally, the SONY readers get the cover image from the'
@ -194,17 +194,17 @@ class PRST1(USBMS):
                time_offsets = {}
                for i, row in enumerate(cursor):
                    try:
-                        comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
+                        comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000)
                    except (OSError, IOError, TypeError):
                        # In case the db has incorrect path info
                        continue
-                    device_date = int(row[1]);
+                    device_date = int(row[1])
                    offset = device_date - comp_date
                    time_offsets.setdefault(offset, 0)
                    time_offsets[offset] = time_offsets[offset] + 1

                try:
-                    device_offset = max(time_offsets,key = lambda a: time_offsets.get(a))
+                    device_offset = max(time_offsets, key=lambda a: time_offsets.get(a))
                    debug_print("Device Offset: %d ms"%device_offset)
                    self.device_offset = device_offset
                except ValueError:
@ -213,7 +213,7 @@ class PRST1(USBMS):
            for idx, book in enumerate(bl):
                query = 'SELECT _id, thumbnail FROM books WHERE file_path = ?'
                t = (book.lpath,)
-                cursor.execute (query, t)
+                cursor.execute(query, t)

                for i, row in enumerate(cursor):
                    book.device_collections = bl_collections.get(row[0], None)
@ -345,7 +345,7 @@ class PRST1(USBMS):
        # Insert the sequence Id if it doesn't
        query = ('INSERT INTO sqlite_sequence (name, seq) '
                'SELECT ?, ? '
-                'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
+                'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)')
        cursor.execute(query, (table, sequence_id, table,))

        cursor.close()
--- a/src/calibre/devices/smart_device_app/driver.py
+++ b/src/calibre/devices/smart_device_app/driver.py
@ -875,6 +875,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            self.client_device_kind = result.get('deviceKind', '')
            self._debug('Client device kind', self.client_device_kind)

+            self.client_device_name = result.get('deviceName', self.client_device_kind)
+            self._debug('Client device name', self.client_device_name)
+
            self.max_book_packet_len = result.get('maxBookContentPacketLen',
                                                  self.BASE_PACKET_LEN)
            self._debug('max_book_packet_len', self.max_book_packet_len)
@ -946,6 +949,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
        return False

    def get_gui_name(self):
+        if getattr(self, 'client_device_name', None):
+            return self.gui_name_template%(self.gui_name, self.client_device_name)
        if getattr(self, 'client_device_kind', None):
            return self.gui_name_template%(self.gui_name, self.client_device_kind)
        return self.gui_name
--- a/src/calibre/ebooks/conversion/plugins/txt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@ -91,14 +91,15 @@ class TXTInput(InputFormatPlugin):
            log.debug('Using user specified input encoding of %s' % ienc)
        else:
            det_encoding = detect(txt)
+            det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
            if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
                    'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
                # Microsoft Word exports to HTML with encoding incorrectly set to
                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
                det_encoding = 'gbk'
-            ienc = det_encoding['encoding']
-            log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, det_encoding['confidence'] * 100))
+            ienc = det_encoding
+            log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, confidence * 100))
        if not ienc:
            ienc = 'utf-8'
            log.debug('No input encoding specified and could not auto detect using %s' % ienc)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -77,7 +77,7 @@ class Plumber(object):

    def __init__(self, input, output, log, report_progress=DummyReporter(),
            dummy=False, merge_plugin_recs=True, abort_after_input_dump=False,
-            override_input_metadata=False):
+            override_input_metadata=False, for_regex_wizard=False):
        '''
        :param input: Path to input file.
        :param output: Path to output file/directory
@ -87,6 +87,7 @@ class Plumber(object):
        if isbytestring(output):
            output = output.decode(filesystem_encoding)
        self.original_input_arg = input
+        self.for_regex_wizard = for_regex_wizard
        self.input = os.path.abspath(input)
        self.output = os.path.abspath(output)
        self.log = log
@ -123,7 +124,7 @@ OptionRecommendation(name='input_profile',
                   'conversion system information on how to interpret '
                   'various information in the input document. For '
                   'example resolution dependent lengths (i.e. lengths in '
-                   'pixels). Choices are:')+\
+                   'pixels). Choices are:')+
                        ', '.join([x.short_name for x in input_profiles()])
        ),

@ -135,7 +136,7 @@ OptionRecommendation(name='output_profile',
                   'created document for the specified device. In some cases, '
                   'an output profile is required to produce documents that '
                   'will work on a device. For example EPUB on the SONY reader. '
-                   'Choices are:') + \
+                   'Choices are:') +
                           ', '.join([x.short_name for x in output_profiles()])
        ),

@ -490,7 +491,7 @@ OptionRecommendation(name='asciiize',
            'cases where there are multiple representations of a character '
            '(characters shared by Chinese and Japanese for instance) the '
            'representation based on the current calibre interface language will be '
-            'used.')%\
+            'used.')%
            u'\u041c\u0438\u0445\u0430\u0438\u043b '
            u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
 )
@ -711,7 +712,6 @@ OptionRecommendation(name='search_replace',
        self.input_fmt = input_fmt
        self.output_fmt = output_fmt

-
        self.all_format_options = set()
        self.input_options = set()
        self.output_options = set()
@ -775,7 +775,7 @@ OptionRecommendation(name='search_replace',
        if not html_files:
            raise ValueError(_('Could not find an ebook inside the archive'))
        html_files = [(f, os.stat(f).st_size) for f in html_files]
-        html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
+        html_files.sort(cmp=lambda x, y: cmp(x[1], y[1]))
        html_files = [f[0] for f in html_files]
        for q in ('toc', 'index'):
            for f in html_files:
@ -783,8 +783,6 @@ OptionRecommendation(name='search_replace',
                    return f, os.path.splitext(f)[1].lower()[1:]
        return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]

-
-
    def get_option_by_name(self, name):
        for group in (self.input_options, self.pipeline_options,
                      self.output_options, self.all_format_options):
@ -956,7 +954,6 @@ OptionRecommendation(name='search_replace',

        self.log.info('Input debug saved to:', out_dir)

-
    def run(self):
        '''
        Run the conversion pipeline
@ -965,6 +962,8 @@ OptionRecommendation(name='search_replace',
        self.setup_options()
        if self.opts.verbose:
            self.log.filter_level = self.log.DEBUG
+        if self.for_regex_wizard and hasattr(self.opts, 'no_process'):
+            self.opts.no_process = True
        self.flush()
        import cssutils, logging
        cssutils.log.setLevel(logging.WARN)
@ -1003,6 +1002,8 @@ OptionRecommendation(name='search_replace',
        self.ui_reporter(0.01, _('Converting input to HTML...'))
        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
        self.input_plugin.report_progress = ir
+        if self.for_regex_wizard:
+            self.input_plugin.for_viewer = True
        with self.input_plugin:
            self.oeb = self.input_plugin(stream, self.opts,
                                        self.input_fmt, self.log,
@ -1014,8 +1015,12 @@ OptionRecommendation(name='search_replace',
            if self.input_fmt in ('recipe', 'downloaded_recipe'):
                self.opts_to_mi(self.user_metadata)
            if not hasattr(self.oeb, 'manifest'):
-                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
-                        encoding=self.input_plugin.output_encoding)
+                self.oeb = create_oebbook(
+                    self.log, self.oeb, self.opts,
+                    encoding=self.input_plugin.output_encoding,
+                    for_regex_wizard=self.for_regex_wizard)
+            if self.for_regex_wizard:
+                return
            self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
            self.opts.is_image_collection = self.input_plugin.is_image_collection
            pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
@ -1081,7 +1086,6 @@ OptionRecommendation(name='search_replace',
            self.dump_oeb(self.oeb, out_dir)
            self.log('Structured HTML written to:', out_dir)

-
        if self.opts.extra_css and os.path.exists(self.opts.extra_css):
            self.opts.extra_css = open(self.opts.extra_css, 'rb').read()

@ -1161,13 +1165,20 @@ OptionRecommendation(name='search_replace',
        self.log(self.output_fmt.upper(), 'output written to', self.output)
        self.flush()

+# This has to be global as create_oebbook can be called from other locations
+# (for example in the html input plugin)
+regex_wizard_callback = None
+def set_regex_wizard_callback(f):
+    global regex_wizard_callback
+    regex_wizard_callback = f
+
 def create_oebbook(log, path_or_stream, opts, reader=None,
-        encoding='utf-8', populate=True):
+        encoding='utf-8', populate=True, for_regex_wizard=False):
    '''
    Create an OEBBook.
    '''
    from calibre.ebooks.oeb.base import OEBBook
-    html_preprocessor = HTMLPreProcessor(log, opts)
+    html_preprocessor = HTMLPreProcessor(log, opts, regex_wizard_callback=regex_wizard_callback)
    if not encoding:
        encoding = None
    oeb = OEBBook(log, html_preprocessor,
@ -1182,3 +1193,4 @@ def create_oebbook(log, path_or_stream, opts, reader=None,

    reader()(oeb, path_or_stream)
    return oeb
+
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -14,7 +14,7 @@ SVG_NS       = 'http://www.w3.org/2000/svg'
 XLINK_NS     = 'http://www.w3.org/1999/xlink'

 convert_entities = functools.partial(entity_to_unicode,
-        result_exceptions = {
+        result_exceptions={
            u'<' : '&lt;',
            u'>' : '&gt;',
            u"'" : '&apos;',
@ -156,7 +156,7 @@ class DocAnalysis(object):
        #        max = l
        #print "max line found is "+str(max)
        # Build the line length histogram
-        hRaw = [ 0 for i in range(0,buckets) ]
+        hRaw = [0 for i in range(0,buckets)]
        for line in self.lines:
            l = len(line)
            if l > minLineLength and l < maxLineLength:
@ -167,7 +167,7 @@ class DocAnalysis(object):
        # Normalize the histogram into percents
        totalLines = len(self.lines)
        if totalLines > 0:
-            h = [ float(count)/totalLines for count in hRaw ]
+            h = [float(count)/totalLines for count in hRaw]
        else:
            h = []
        #print "\nhRaw histogram lengths are: "+str(hRaw)
@ -200,7 +200,7 @@ class Dehyphenator(object):
        # Add common suffixes to the regex below to increase the likelihood of a match -
        # don't add suffixes which are also complete words, such as 'able' or 'sex'
        # only remove if it's not already the point of hyphenation
-        self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"
+        self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"  # noqa
        self.suffixes = re.compile(r"^%s" % self.suffix_string, re.IGNORECASE)
        self.removesuffixes = re.compile(r"%s" % self.suffix_string, re.IGNORECASE)
        # remove prefixes if the prefix was not already the point of hyphenation
@ -265,19 +265,18 @@ class Dehyphenator(object):
        self.html = html
        self.format = format
        if format == 'html':
-            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)
+            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)  # noqa
        elif format == 'pdf':
            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
        elif format == 'txt':
-            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
+            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)  # noqa
        elif format == 'individual_words':
            intextmatch = re.compile(u'(?!<)(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)')
        elif format == 'html_cleanup':
-            intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
+            intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')  # noqa
        elif format == 'txt_cleanup':
            intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)(?P<wraptags>\s+)(?P<secondpart>[\w\d]+)')

-
        html = intextmatch.sub(self.dehyphenate, html)
        return html

@ -498,9 +497,11 @@ class HTMLPreProcessor(object):
                     (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                      lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                     ]
-    def __init__(self, log=None, extra_opts=None):
+    def __init__(self, log=None, extra_opts=None, regex_wizard_callback=None):
        self.log = log
        self.extra_opts = extra_opts
+        self.regex_wizard_callback = regex_wizard_callback
+        self.current_href = None

    def is_baen(self, src):
        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
@ -581,12 +582,15 @@ class HTMLPreProcessor(object):
                end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                end_rules.append(
                    # Un wrap using punctuation
-                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),  # noqa
                )

        for rule in self.PREPROCESS + start_rules:
            html = rule[0].sub(rule[1], html)

+        if self.regex_wizard_callback is not None:
+            self.regex_wizard_callback(self.current_href, html)
+
        if get_preprocess_html:
            return html

--- a/src/calibre/ebooks/docx/block_styles.py
+++ b/src/calibre/ebooks/docx/block_styles.py
@ -0,0 +1,290 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from collections import OrderedDict
+from calibre.ebooks.docx.names import XPath, get
+
+class Inherit:
+    pass
+inherit = Inherit()
+
+def binary_property(parent, name):
+    vals = XPath('./w:%s' % name)(parent)
+    if not vals:
+        return inherit
+    val = get(vals[0], 'w:val', 'on')
+    return True if val in {'on', '1', 'true'} else False
+
+def simple_color(col, auto='black'):
+    if not col or col == 'auto' or len(col) != 6:
+        return auto
+    return '#'+col
+
+def simple_float(val, mult=1.0):
+    try:
+        return float(val) * mult
+    except (ValueError, TypeError, AttributeError, KeyError):
+        return None
+
+
+LINE_STYLES = {  # {{{
+    'basicBlackDashes': 'dashed',
+    'basicBlackDots': 'dotted',
+    'basicBlackSquares': 'dashed',
+    'basicThinLines': 'solid',
+    'dashDotStroked': 'groove',
+    'dashed': 'dashed',
+    'dashSmallGap': 'dashed',
+    'dotDash': 'dashed',
+    'dotDotDash': 'dashed',
+    'dotted': 'dotted',
+    'double': 'double',
+    'inset': 'inset',
+    'nil': 'none',
+    'none': 'none',
+    'outset': 'outset',
+    'single': 'solid',
+    'thick': 'solid',
+    'thickThinLargeGap': 'double',
+    'thickThinMediumGap': 'double',
+    'thickThinSmallGap' : 'double',
+    'thinThickLargeGap': 'double',
+    'thinThickMediumGap': 'double',
+    'thinThickSmallGap': 'double',
+    'thinThickThinLargeGap': 'double',
+    'thinThickThinMediumGap': 'double',
+    'thinThickThinSmallGap': 'double',
+    'threeDEmboss': 'ridge',
+    'threeDEngrave': 'groove',
+    'triple': 'double',
+}  # }}}
+
+# Read from XML {{{
+def read_border(parent, dest):
+    tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
+            'border_%s_style':inherit, 'border_%s_color':inherit}
+    vals = {}
+    for edge in ('left', 'top', 'right', 'bottom'):
+        vals.update({k % edge:v for k, v in tvals.iteritems()})
+
+    for border in XPath('./w:pBdr')(parent):
+        for edge in ('left', 'top', 'right', 'bottom'):
+            for elem in XPath('./w:%s' % edge):
+                color = get(elem, 'w:color')
+                if color is not None:
+                    vals['border_%s_color' % edge] = simple_color(color)
+                style = get(elem, 'w:val')
+                if style is not None:
+                    vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
+                space = get(elem, 'w:space')
+                if space is not None:
+                    try:
+                        vals['padding_%s' % edge] = float(space)
+                    except (ValueError, TypeError):
+                        pass
+                sz = get(elem, 'w:sz')
+                if sz is not None:
+                    # we dont care about art borders (they are only used for page borders)
+                    try:
+                        vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
+                    except (ValueError, TypeError):
+                        pass
+
+    for key, val in vals.iteritems():
+        setattr(dest, key, val)
+
+def read_indent(parent, dest):
+    padding_left = padding_right = text_indent = inherit
+    for indent in XPath('./w:ind')(parent):
+        l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars')
+        pl = simple_float(lc, 0.01) if lc is not None else simple_float(l, 0.05) if l is not None else None
+        if pl is not None:
+            padding_left = '%.3g%s' % (pl, 'em' if lc is not None else 'pt')
+
+        r, rc = get(indent, 'w:right'), get(indent, 'w:rightChars')
+        pr = simple_float(rc, 0.01) if rc is not None else simple_float(r, 0.05) if r is not None else None
+        if pr is not None:
+            padding_right = '%.3g%s' % (pr, 'em' if rc is not None else 'pt')
+
+        h, hc = get(indent, 'w:hanging'), get(indent, 'w:hangingChars')
+        fl, flc = get(indent, 'w:firstLine'), get(indent, 'w:firstLineChars')
+        h = h if h is None else '-'+h
+        hc = hc if hc is None else '-'+hc
+        ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
+              simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
+        if ti is not None:
+            text_indent = '%.3g%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
+
+    setattr(dest, 'margin_left', padding_left)
+    setattr(dest, 'margin_right', padding_right)
+    setattr(dest, 'text_indent', text_indent)
+
+def read_justification(parent, dest):
+    ans = inherit
+    for jc in XPath('./w:jc[@w:val]')(parent):
+        val = get(jc, 'w:val')
+        if not val:
+            continue
+        if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
+            ans = 'justify'
+        if val in {'left', 'center', 'right',}:
+            ans = val
+    setattr(dest, 'text_align', ans)
+
+def read_spacing(parent, dest):
+    padding_top = padding_bottom = line_height = inherit
+    for s in XPath('./w:spacing')(parent):
+        a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing')
+        pb = None if aa in {'on', '1', 'true'} else simple_float(al, 0.02) if al is not None else simple_float(a, 0.05) if a is not None else None
+        if pb is not None:
+            padding_bottom = '%.3g%s' % (pb, 'ex' if al is not None else 'pt')
+
+        b, bl, bb = get(s, 'w:before'), get(s, 'w:beforeLines'), get(s, 'w:beforeAutospacing')
+        pt = None if bb in {'on', '1', 'true'} else simple_float(bl, 0.02) if bl is not None else simple_float(b, 0.05) if b is not None else None
+        if pt is not None:
+            padding_top = '%.3g%s' % (pt, 'ex' if bl is not None else 'pt')
+
+        l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
+        if l is not None:
+            lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
+            line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
+
+    setattr(dest, 'margin_top', padding_top)
+    setattr(dest, 'margin_bottom', padding_bottom)
+    setattr(dest, 'line_height', line_height)
+
+def read_direction(parent, dest):
+    ans = inherit
+    for jc in XPath('./w:textFlow[@w:val]')(parent):
+        val = get(jc, 'w:val')
+        if not val:
+            continue
+        if 'rl' in val.lower():
+            ans = 'rtl'
+    setattr(dest, 'direction', ans)
+
+def read_shd(parent, dest):
+    ans = inherit
+    for shd in XPath('./w:shd[@w:fill]')(parent):
+        val = get(shd, 'w:fill')
+        if val:
+            ans = simple_color(val, auto='transparent')
+    setattr(dest, 'background_color', ans)
+
+def read_numbering(parent, dest):
+    lvl = num_id = None
+    for np in XPath('./w:numPr')(parent):
+        for ilvl in XPath('./w:ilvl[@w:val]')(np):
+            try:
+                lvl = int(get(ilvl, 'w:val'))
+            except (ValueError, TypeError):
+                pass
+        for num in XPath('./w:numId[@w:val]')(np):
+            num_id = get(num, 'w:val')
+    val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
+    setattr(dest, 'numbering', val)
+
+# }}}
+
+class ParagraphStyle(object):
+
+    all_properties = (
+        'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
+        'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
+        'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
+        'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
+
+        # Border margins padding
+        'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
+        'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
+        'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
+        'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
+        'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
+
+        # Misc.
+        'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
+        'numbering', 'font_family', 'font_size',
+    )
+
+    def __init__(self, pPr=None):
+        self.linked_style = None
+        if pPr is None:
+            for p in self.all_properties:
+                setattr(self, p, inherit)
+        else:
+            for p in (
+                'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
+                'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
+                'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
+                'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
+            ):
+                setattr(self, p, binary_property(pPr, p))
+
+            for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'):
+                f = globals()['read_%s' % x]
+                f(pPr, self)
+
+            for s in XPath('./w:pStyle[@w:val]')(pPr):
+                self.linked_style = get(s, 'w:val')
+
+            self.font_family = self.font_size = inherit
+
+        self._css = None
+
+    def update(self, other):
+        for prop in self.all_properties:
+            nval = getattr(other, prop)
+            if nval is not inherit:
+                setattr(self, prop, nval)
+        if other.linked_style is not None:
+            self.linked_style = other.linked_style
+
+    def resolve_based_on(self, parent):
+        for p in self.all_properties:
+            val = getattr(self, p)
+            if val is inherit:
+                setattr(self, p, getattr(parent, p))
+
+    @property
+    def css(self):
+        if self._css is None:
+            self._css = c = OrderedDict()
+            if self.keepLines is True:
+                c['page-break-inside'] = 'avoid'
+            if self.pageBreakBefore is True:
+                c['page-break-before'] = 'always'
+            for edge in ('left', 'top', 'right', 'bottom'):
+                val = getattr(self, 'border_%s_width' % edge)
+                if val is not inherit:
+                    c['border-left-width'] = '%.3gpt' % val
+                for x in ('style', 'color'):
+                    val = getattr(self, 'border_%s_%s' % (edge, x))
+                    if val is not inherit:
+                        c['border-%s-%s' % (edge, x)] = val
+                val = getattr(self, 'padding_%s' % edge)
+                if val is not inherit:
+                    c['padding-%s' % edge] = '%.3gpt' % val
+                val = getattr(self, 'margin_%s' % edge)
+                if val is not inherit:
+                    c['margin-%s' % edge] = val
+
+            if self.line_height not in {inherit, '1'}:
+                c['line-height'] = self.line_height
+
+            for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    if x == 'font_size':
+                        val = '%.3gpt' % val
+                    c[x.replace('_', '-')] = val
+
+        return self._css
+
+        # TODO: keepNext must be done at markup level
+
+
--- a/src/calibre/ebooks/docx/char_styles.py
+++ b/src/calibre/ebooks/docx/char_styles.py
@ -0,0 +1,249 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from collections import OrderedDict
+from calibre.ebooks.docx.block_styles import (  # noqa
+    inherit, simple_color, LINE_STYLES, simple_float, binary_property, read_shd)
+from calibre.ebooks.docx.names import XPath, get
+
+# Read from XML {{{
+def read_text_border(parent, dest):
+    border_color = border_style = border_width = padding = inherit
+    elems = XPath('./w:bdr')(parent)
+    if elems:
+        border_color = simple_color('auto')
+        border_style = 'solid'
+        border_width = 1
+    for elem in elems:
+        color = get(elem, 'w:color')
+        if color is not None:
+            border_color = simple_color(color)
+        style = get(elem, 'w:val')
+        if style is not None:
+            border_style = LINE_STYLES.get(style, 'solid')
+        space = get(elem, 'w:space')
+        if space is not None:
+            try:
+                padding = float(space)
+            except (ValueError, TypeError):
+                pass
+        sz = get(elem, 'w:sz')
+        if sz is not None:
+            # we dont care about art borders (they are only used for page borders)
+            try:
+                border_width = min(96, max(2, float(sz))) / 8
+            except (ValueError, TypeError):
+                pass
+
+    setattr(dest, 'border_color', border_color)
+    setattr(dest, 'border_style', border_style)
+    setattr(dest, 'border_width', border_width)
+    setattr(dest, 'padding', padding)
+
+def read_color(parent, dest):
+    ans = inherit
+    for col in XPath('./w:color[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        ans = simple_color(val)
+    setattr(dest, 'color', ans)
+
+def read_highlight(parent, dest):
+    ans = inherit
+    for col in XPath('./w:highlight[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        if not val or val == 'none':
+            val = 'transparent'
+        ans = val
+    setattr(dest, 'highlight', ans)
+
+def read_lang(parent, dest):
+    ans = inherit
+    for col in XPath('./w:lang[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        try:
+            code = int(val, 16)
+        except (ValueError, TypeError):
+            ans = val
+        else:
+            from calibre.ebooks.docx.lcid import lcid
+            val = lcid.get(code, None)
+            if val:
+                ans = val
+    setattr(dest, 'lang', ans)
+
+def read_letter_spacing(parent, dest):
+    ans = inherit
+    for col in XPath('./w:spacing[@w:val]')(parent):
+        val = simple_float(get(col, 'w:val'), 0.05)
+        if val is not None:
+            ans = val
+    setattr(dest, 'letter_spacing', ans)
+
+def read_sz(parent, dest):
+    ans = inherit
+    for col in XPath('./w:sz[@w:val]')(parent):
+        val = simple_float(get(col, 'w:val'), 0.5)
+        if val is not None:
+            ans = val
+    setattr(dest, 'font_size', ans)
+
+def read_underline(parent, dest):
+    ans = inherit
+    for col in XPath('./w:u[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if val:
+            ans = 'underline'
+    setattr(dest, 'text_decoration', ans)
+
+def read_vert_align(parent, dest):
+    ans = inherit
+    for col in XPath('./w:vertAlign[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if val and val in {'baseline', 'subscript', 'superscript'}:
+            ans = val
+    setattr(dest, 'vert_align', ans)
+
+def read_font_family(parent, dest):
+    ans = inherit
+    for col in XPath('./w:rFonts[@w:ascii]')(parent):
+        val = get(col, 'w:ascii')
+        if val:
+            ans = val
+    setattr(dest, 'font_family', ans)
+# }}}
+
+class RunStyle(object):
+
+    all_properties = {
+        'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint',
+        'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
+
+        'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
+        'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang', 'font_family'
+    }
+
+    toggle_properties = {
+        'b', 'bCs', 'caps', 'emboss', 'i', 'iCs', 'imprint', 'shadow', 'smallCaps', 'strike', 'dstrike', 'vanish',
+    }
+
+    def __init__(self, rPr=None):
+        self.linked_style = None
+        if rPr is None:
+            for p in self.all_properties:
+                setattr(self, p, inherit)
+        else:
+            for p in (
+                'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
+                'smallCaps', 'strike', 'vanish',
+            ):
+                setattr(self, p, binary_property(rPr, p))
+
+            for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang', 'font_family'):
+                f = globals()['read_%s' % x]
+                f(rPr, self)
+
+            for s in XPath('./w:rStyle[@w:val]')(rPr):
+                self.linked_style = get(s, 'w:val')
+
+        self._css = None
+
+    def update(self, other):
+        for prop in self.all_properties:
+            nval = getattr(other, prop)
+            if nval is not inherit:
+                setattr(self, prop, nval)
+        if other.linked_style is not None:
+            self.linked_style = other.linked_style
+
+    def resolve_based_on(self, parent):
+        for p in self.all_properties:
+            val = getattr(self, p)
+            if val is inherit:
+                setattr(self, p, getattr(parent, p))
+
+    def get_border_css(self, ans):
+        for x in ('color', 'style', 'width'):
+            val = getattr(self, 'border_'+x)
+            if x == 'width' and val is not inherit:
+                val = '%.3gpt' % val
+            if val is not inherit:
+                ans['border-%s' % x] = val
+
+    def clear_border_css(self):
+        for x in ('color', 'style', 'width'):
+            setattr(self, 'border_'+x, inherit)
+
+    @property
+    def css(self):
+        if self._css is None:
+            c = self._css = OrderedDict()
+            td = set()
+            if self.text_decoration is not inherit:
+                td.add(self.text_decoration)
+            if self.strike:
+                td.add('line-through')
+            if self.dstrike:
+                td.add('line-through')
+            if td:
+                c['text-decoration'] = ' '.join(td)
+            if self.caps is True:
+                c['text-transform'] = 'uppercase'
+            if self.i is True:
+                c['font-style'] = 'italic'
+            if self.shadow:
+                c['text-shadow'] = '2px 2px'
+            if self.smallCaps is True:
+                c['font-variant'] = 'small-caps'
+            if self.vanish is True:
+                c['display'] = 'none'
+
+            self.get_border_css(c)
+            if self.padding is not inherit:
+                c['padding'] = '%.3gpt' % self.padding
+
+            for x in ('color', 'background_color'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    c[x.replace('_', '-')] = val
+
+            for x in ('letter_spacing', 'font_size'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    c[x.replace('_', '-')] = '%.3gpt' % val
+
+            if self.highlight is not inherit and self.highlight != 'transparent':
+                c['background-color'] = self.highlight
+
+            if self.b:
+                c['font-weight'] = 'bold'
+
+            if self.font_family is not inherit:
+                c['font-family'] = self.font_family
+
+        return self._css
+
+    def same_border(self, other):
+        for x in (self, other):
+            has_border = False
+            for y in ('color', 'style', 'width'):
+                if ('border-%s' % y) in x.css:
+                    has_border = True
+                    break
+            if not has_border:
+                return False
+
+        s = tuple(self.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
+        o = tuple(other.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
+        return s == o
+
--- a/src/calibre/ebooks/docx/container.py
+++ b/src/calibre/ebooks/docx/container.py
@ -105,6 +105,9 @@ class DOCX(object):
            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
            self.names[name] = f

+    def exists(self, name):
+        return name in self.names
+
    def read(self, name):
        if hasattr(self, 'zipf'):
            return self.zipf.open(name).read()
@ -149,14 +152,41 @@ class DOCX(object):
            self.relationships_rmap[target] = typ

    @property
-    def document(self):
+    def document_name(self):
        name = self.relationships.get(DOCUMENT, None)
        if name is None:
            names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml'))
            if not names:
                raise InvalidDOCX('The file %s docx file has no main document' % self.name)
            name = names[0]
-        return fromstring(self.read(name))
+        return name
+
+    @property
+    def document(self):
+        return fromstring(self.read(self.document_name))
+
+    @property
+    def document_relationships(self):
+        return self.get_relationships(self.document_name)
+
+    def get_relationships(self, name):
+        base = '/'.join(name.split('/')[:-1])
+        by_id, by_type = {}, {}
+        parts = name.split('/')
+        name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels'])
+        try:
+            raw = self.read(name)
+        except KeyError:
+            pass
+        else:
+            root = fromstring(raw)
+            for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
+                target = '/'.join((base, item.get('Target').lstrip('/')))
+                typ = item.get('Type')
+                Id = item.get('Id')
+                by_id[Id] = by_type[typ] = target
+
+        return by_id, by_type

    @property
    def metadata(self):
--- a/src/calibre/ebooks/docx/dump.py
+++ b/src/calibre/ebooks/docx/dump.py
@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import sys, os, shutil
+
+from lxml import etree
+
+from calibre import walk
+from calibre.utils.zipfile import ZipFile
+
+def dump(path):
+    dest = os.path.splitext(os.path.basename(path))[0]
+    dest += '_extracted'
+    if os.path.exists(dest):
+        shutil.rmtree(dest)
+    with ZipFile(path) as zf:
+        zf.extractall(dest)
+
+    for f in walk(dest):
+        if f.endswith('.xml') or f.endswith('.rels'):
+            with open(f, 'r+b') as stream:
+                raw = stream.read()
+                root = etree.fromstring(raw)
+                stream.seek(0)
+                stream.truncate()
+                stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
+
+    print (path, 'dumped to', dest)
+
+if __name__ == '__main__':
+    dump(sys.argv[-1])
+
--- a/src/calibre/ebooks/docx/fonts.py
+++ b/src/calibre/ebooks/docx/fonts.py
@ -0,0 +1,132 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import os, re
+from collections import namedtuple
+
+from calibre.ebooks.docx.block_styles import binary_property, inherit
+from calibre.ebooks.docx.names import XPath, get
+from calibre.utils.filenames import ascii_filename
+from calibre.utils.fonts.scanner import font_scanner, NoFonts
+from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
+
+Embed = namedtuple('Embed', 'name key subsetted')
+
+def has_system_fonts(name):
+    try:
+        return bool(font_scanner.fonts_for_family(name))
+    except NoFonts:
+        return False
+
+def get_variant(bold=False, italic=False):
+    return {(False, False):'Regular', (False, True):'Italic',
+            (True, False):'Bold', (True, True):'BoldItalic'}[(bold, italic)]
+
+class Family(object):
+
+    def __init__(self, elem, embed_relationships):
+        self.name = self.family_name = get(elem, 'w:name')
+        self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem))
+        if self.alt_names and not has_system_fonts(self.name):
+            for x in self.alt_names:
+                if has_system_fonts(x):
+                    self.family_name = x
+                    break
+
+        self.embedded = {}
+        for x in ('Regular', 'Bold', 'Italic', 'BoldItalic'):
+            for y in XPath('./w:embed%s[@r:id]' % x)(elem):
+                rid = get(y, 'r:id')
+                key = get(y, 'w:fontKey')
+                subsetted = get(y, 'w:subsetted') in {'1', 'true', 'on'}
+                if rid in embed_relationships:
+                    self.embedded[x] = Embed(embed_relationships[rid], key, subsetted)
+
+        self.generic_family = 'auto'
+        for x in XPath('./w:family[@w:val]')(elem):
+            self.generic_family = get(x, 'w:val', 'auto')
+
+        ntt = binary_property(elem, 'notTrueType')
+        self.is_ttf = ntt is inherit or not ntt
+
+        self.panose1 = None
+        self.panose_name = None
+        for x in XPath('./w:panose1[@w:val]')(elem):
+            try:
+                v = get(x, 'w:val')
+                v = tuple(int(v[i:i+2], 16) for i in xrange(0, len(v), 2))
+            except (TypeError, ValueError, IndexError):
+                pass
+            else:
+                self.panose1 = v
+                self.panose_name = panose_to_css_generic_family(v)
+
+        self.css_generic_family = {'roman':'serif', 'swiss':'sans-serif', 'modern':'monospace',
+                                   'decorative':'fantasy', 'script':'cursive'}.get(self.generic_family, None)
+        self.css_generic_family = self.css_generic_family or self.panose_name or 'serif'
+
+
+class Fonts(object):
+
+    def __init__(self):
+        self.fonts = {}
+        self.used = set()
+
+    def __call__(self, root, embed_relationships, docx, dest_dir):
+        for elem in XPath('//w:font[@w:name]')(root):
+            self.fonts[get(elem, 'w:name')] = Family(elem, embed_relationships)
+
+    def family_for(self, name, bold=False, italic=False):
+        f = self.fonts.get(name, None)
+        if f is None:
+            return 'serif'
+        variant = get_variant(bold, italic)
+        self.used.add((name, variant))
+        name = f.name if variant in f.embedded else f.family_name
+        return '"%s", %s' % (name.replace('"', ''), f.css_generic_family)
+
+    def embed_fonts(self, dest_dir, docx):
+        defs = []
+        dest_dir = os.path.join(dest_dir, 'fonts')
+        for name, variant in self.used:
+            f = self.fonts[name]
+            if variant in f.embedded:
+                if not os.path.exists(dest_dir):
+                    os.mkdir(dest_dir)
+                fname = self.write(name, dest_dir, docx, variant)
+                if fname is not None:
+                    d = {'font-family':'"%s"' % name.replace('"', ''), 'src': 'url("fonts/%s")' % fname}
+                    if 'Bold' in variant:
+                        d['font-weight'] = 'bold'
+                    if 'Italic' in variant:
+                        d['font-style'] = 'italic'
+                    d = ['%s: %s' % (k, v) for k, v in d.iteritems()]
+                    d = ';\n\t'.join(d)
+                    defs.append('@font-face {\n\t%s\n}\n' % d)
+        return '\n'.join(defs)
+
+    def write(self, name, dest_dir, docx, variant):
+        f = self.fonts[name]
+        ef = f.embedded[variant]
+        raw = docx.read(ef.name)
+        prefix = raw[:32]
+        if ef.key:
+            key = re.sub(r'[^A-Fa-f0-9]', '', ef.key)
+            key = bytearray(reversed(tuple(int(key[i:i+2], 16) for i in xrange(0, len(key), 2))))
+            prefix = bytearray(prefix)
+            prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in xrange(len(prefix))))
+        if not is_truetype_font(prefix):
+            return None
+        ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf'
+        fname = ascii_filename('%s - %s.%s' % (name, variant, ext))
+        with open(os.path.join(dest_dir, fname), 'wb') as dest:
+            dest.write(prefix)
+            dest.write(raw[32:])
+
+        return fname
+
--- a/src/calibre/ebooks/docx/lcid.py
+++ b/src/calibre/ebooks/docx/lcid.py
@ -0,0 +1,233 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+lcid = {
+    1078: 'af',  # Afrikaans - South Africa
+    1052: 'sq',  # Albanian - Albania
+    1118: 'am',  # Amharic - Ethiopia
+    1025: 'ar',  # Arabic - Saudi Arabia
+    5121: 'ar',  # Arabic - Algeria
+    15361: 'ar',  # Arabic - Bahrain
+    3073: 'ar',  # Arabic - Egypt
+    2049: 'ar',  # Arabic - Iraq
+    11265: 'ar',  # Arabic - Jordan
+    13313: 'ar',  # Arabic - Kuwait
+    12289: 'ar',  # Arabic - Lebanon
+    4097: 'ar',  # Arabic - Libya
+    6145: 'ar',  # Arabic - Morocco
+    8193: 'ar',  # Arabic - Oman
+    16385: 'ar',  # Arabic - Qatar
+    10241: 'ar',  # Arabic - Syria
+    7169: 'ar',  # Arabic - Tunisia
+    14337: 'ar',  # Arabic - U.A.E.
+    9217: 'ar',  # Arabic - Yemen
+    1067: 'hy',  # Armenian - Armenia
+    1101: 'as',  # Assamese
+    2092: 'az',  # Azeri (Cyrillic)
+    1068: 'az',  # Azeri (Latin)
+    1069: 'eu',  # Basque
+    1059: 'be',  # Belarusian
+    1093: 'bn',  # Bengali (India)
+    2117: 'bn',  # Bengali (Bangladesh)
+    5146: 'bs',  # Bosnian (Bosnia/Herzegovina)
+    1026: 'bg',  # Bulgarian
+    1109: 'my',  # Burmese
+    1027: 'ca',  # Catalan
+    1116: 'chr',  # Cherokee - United States
+    2052: 'zh',  # Chinese - People's Republic of China
+    4100: 'zh',  # Chinese - Singapore
+    1028: 'zh',  # Chinese - Taiwan
+    3076: 'zh',  # Chinese - Hong Kong SAR
+    5124: 'zh',  # Chinese - Macao SAR
+    1050: 'hr',  # Croatian
+    4122: 'hr',  # Croatian (Bosnia/Herzegovina)
+    1029: 'cs',  # Czech
+    1030: 'da',  # Danish
+    1125: 'dv',  # Divehi
+    1043: 'nl',  # Dutch - Netherlands
+    2067: 'nl',  # Dutch - Belgium
+    1126: 'bin',  # Edo
+    1033: 'en',  # English - United States
+    2057: 'en',  # English - United Kingdom
+    3081: 'en',  # English - Australia
+    10249: 'en',  # English - Belize
+    4105: 'en',  # English - Canada
+    9225: 'en',  # English - Caribbean
+    15369: 'en',  # English - Hong Kong SAR
+    16393: 'en',  # English - India
+    14345: 'en',  # English - Indonesia
+    6153: 'en',  # English - Ireland
+    8201: 'en',  # English - Jamaica
+    17417: 'en',  # English - Malaysia
+    5129: 'en',  # English - New Zealand
+    13321: 'en',  # English - Philippines
+    18441: 'en',  # English - Singapore
+    7177: 'en',  # English - South Africa
+    11273: 'en',  # English - Trinidad
+    12297: 'en',  # English - Zimbabwe
+    1061: 'et',  # Estonian
+    1080: 'fo',  # Faroese
+    1065: None,  # TODO: Farsi
+    1124: 'fil',  # Filipino
+    1035: 'fi',  # Finnish
+    1036: 'fr',  # French - France
+    2060: 'fr',  # French - Belgium
+    11276: 'fr',  # French - Cameroon
+    3084: 'fr',  # French - Canada
+    9228: 'fr',  # French - Democratic Rep. of Congo
+    12300: 'fr',  # French - Cote d'Ivoire
+    15372: 'fr',  # French - Haiti
+    5132: 'fr',  # French - Luxembourg
+    13324: 'fr',  # French - Mali
+    6156: 'fr',  # French - Monaco
+    14348: 'fr',  # French - Morocco
+    58380: 'fr',  # French - North Africa
+    8204: 'fr',  # French - Reunion
+    10252: 'fr',  # French - Senegal
+    4108: 'fr',  # French - Switzerland
+    7180: 'fr',  # French - West Indies
+    1122: 'fy',  # Frisian - Netherlands
+    1127: None,  # TODO: Fulfulde - Nigeria
+    1071: 'mk',  # FYRO Macedonian
+    2108: 'ga',  # Gaelic (Ireland)
+    1084: 'gd',  # Gaelic (Scotland)
+    1110: 'gl',  # Galician
+    1079: 'ka',  # Georgian
+    1031: 'de',  # German - Germany
+    3079: 'de',  # German - Austria
+    5127: 'de',  # German - Liechtenstein
+    4103: 'de',  # German - Luxembourg
+    2055: 'de',  # German - Switzerland
+    1032: 'el',  # Greek
+    1140: 'gn',  # Guarani - Paraguay
+    1095: 'gu',  # Gujarati
+    1128: 'ha',  # Hausa - Nigeria
+    1141: 'haw',  # Hawaiian - United States
+    1037: 'he',  # Hebrew
+    1081: 'hi',  # Hindi
+    1038: 'hu',  # Hungarian
+    1129: None,  # TODO: Ibibio - Nigeria
+    1039: 'is',  # Icelandic
+    1136: 'ig',  # Igbo - Nigeria
+    1057: 'id',  # Indonesian
+    1117: 'iu',  # Inuktitut
+    1040: 'it',  # Italian - Italy
+    2064: 'it',  # Italian - Switzerland
+    1041: 'ja',  # Japanese
+    1099: 'kn',  # Kannada
+    1137: 'kr',  # Kanuri - Nigeria
+    2144: 'ks',  # Kashmiri
+    1120: 'ks',  # Kashmiri (Arabic)
+    1087: 'kk',  # Kazakh
+    1107: 'km',  # Khmer
+    1111: 'kok',  # Konkani
+    1042: 'ko',  # Korean
+    1088: 'ky',  # Kyrgyz (Cyrillic)
+    1108: 'lo',  # Lao
+    1142: 'la',  # Latin
+    1062: 'lv',  # Latvian
+    1063: 'lt',  # Lithuanian
+    1086: 'ms',  # Malay - Malaysia
+    2110: 'ms',  # Malay - Brunei Darussalam
+    1100: 'ml',  # Malayalam
+    1082: 'mt',  # Maltese
+    1112: 'mni',  # Manipuri
+    1153: 'mi',  # Maori - New Zealand
+    1102: 'mr',  # Marathi
+    1104: 'mn',  # Mongolian (Cyrillic)
+    2128: 'mn',  # Mongolian (Mongolian)
+    1121: 'ne',  # Nepali
+    2145: 'ne',  # Nepali - India
+    1044: 'no',  # Norwegian (Bokmￃﾥl)
+    2068: 'no',  # Norwegian (Nynorsk)
+    1096: 'or',  # Oriya
+    1138: 'om',  # Oromo
+    1145: 'pap',  # Papiamentu
+    1123: 'ps',  # Pashto
+    1045: 'pl',  # Polish
+    1046: 'pt',  # Portuguese - Brazil
+    2070: 'pt',  # Portuguese - Portugal
+    1094: 'pa',  # Punjabi
+    2118: 'pa',  # Punjabi (Pakistan)
+    1131: 'qu',  # Quecha - Bolivia
+    2155: 'qu',  # Quecha - Ecuador
+    3179: 'qu',  # Quecha - Peru
+    1047: 'rm',  # Rhaeto-Romanic
+    1048: 'ro',  # Romanian
+    2072: 'ro',  # Romanian - Moldava
+    1049: 'ru',  # Russian
+    2073: 'ru',  # Russian - Moldava
+    1083: 'se',  # Sami (Lappish)
+    1103: 'sa',  # Sanskrit
+    1132: 'nso',  # Sepedi
+    3098: 'sr',  # Serbian (Cyrillic)
+    2074: 'sr',  # Serbian (Latin)
+    1113: 'sd',  # Sindhi - India
+    2137: 'sd',  # Sindhi - Pakistan
+    1115: 'si',  # Sinhalese - Sri Lanka
+    1051: 'sk',  # Slovak
+    1060: 'sl',  # Slovenian
+    1143: 'so',  # Somali
+    1070: 'wen',  # Sorbian
+    3082: 'es',  # Spanish - Spain (Modern Sort)
+    1034: 'es',  # Spanish - Spain (Traditional Sort)
+    11274: 'es',  # Spanish - Argentina
+    16394: 'es',  # Spanish - Bolivia
+    13322: 'es',  # Spanish - Chile
+    9226: 'es',  # Spanish - Colombia
+    5130: 'es',  # Spanish - Costa Rica
+    7178: 'es',  # Spanish - Dominican Republic
+    12298: 'es',  # Spanish - Ecuador
+    17418: 'es',  # Spanish - El Salvador
+    4106: 'es',  # Spanish - Guatemala
+    18442: 'es',  # Spanish - Honduras
+    58378: 'es',  # Spanish - Latin America
+    2058: 'es',  # Spanish - Mexico
+    19466: 'es',  # Spanish - Nicaragua
+    6154: 'es',  # Spanish - Panama
+    15370: 'es',  # Spanish - Paraguay
+    10250: 'es',  # Spanish - Peru
+    20490: 'es',  # Spanish - Puerto Rico
+    21514: 'es',  # Spanish - United States
+    14346: 'es',  # Spanish - Uruguay
+    8202: 'es',  # Spanish - Venezuela
+    1072: None,  # TODO: Sutu
+    1089: 'sw',  # Swahili
+    1053: 'sv',  # Swedish
+    2077: 'sv',  # Swedish - Finland
+    1114: 'syr',  # Syriac
+    1064: 'tg',  # Tajik
+    1119: None,  # TODO: Tamazight (Arabic)
+    2143: None,  # TODO: Tamazight (Latin)
+    1097: 'ta',  # Tamil
+    1092: 'tt',  # Tatar
+    1098: 'te',  # Telugu
+    1054: 'th',  # Thai
+    2129: 'bo',  # Tibetan - Bhutan
+    1105: 'bo',  # Tibetan - People's Republic of China
+    2163: 'ti',  # Tigrigna - Eritrea
+    1139: 'ti',  # Tigrigna - Ethiopia
+    1073: 'ts',  # Tsonga
+    1074: 'tn',  # Tswana
+    1055: 'tr',  # Turkish
+    1090: 'tk',  # Turkmen
+    1152: 'ug',  # Uighur - China
+    1058: 'uk',  # Ukrainian
+    1056: 'ur',  # Urdu
+    2080: 'ur',  # Urdu - India
+    2115: 'uz',  # Uzbek (Cyrillic)
+    1091: 'uz',  # Uzbek (Latin)
+    1075: 've',  # Venda
+    1066: 'vi',  # Vietnamese
+    1106: 'cy',  # Welsh
+    1076: 'xh',  # Xhosa
+    1144: 'ii',  # Yi
+    1085: 'yi',  # Yiddish
+    1130: 'yo',  # Yoruba
+    1077: 'zu'  # Zulu
+}
--- a/src/calibre/ebooks/docx/names.py
+++ b/src/calibre/ebooks/docx/names.py
@ -11,6 +11,9 @@ from lxml.etree import XPath as X
 DOCUMENT  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
 DOCPROPS  = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
 APPPROPS  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
+STYLES    = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
+NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
+FONTS     = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'

 namespaces = {
    'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
@ -20,6 +23,7 @@ namespaces = {
    'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
    'w10': 'urn:schemas-microsoft-com:office:word',
    'wne': 'http://schemas.microsoft.com/office/word/2006/wordml',
+    'xml': 'http://www.w3.org/XML/1998/namespace',
    # Drawing
    'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
    'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
@ -42,6 +46,26 @@ namespaces = {
    'dcterms': 'http://purl.org/dc/terms/'
 }

-def XPath(expr):
-    return X(expr, namespaces=namespaces)
+xpath_cache = {}
+
+def XPath(expr):
+    ans = xpath_cache.get(expr, None)
+    if ans is None:
+        xpath_cache[expr] = ans = X(expr, namespaces=namespaces)
+    return ans
+
+def is_tag(x, q):
+    tag = getattr(x, 'tag', x)
+    ns, name = q.partition(':')[0::2]
+    return '{%s}%s' % (namespaces.get(ns, None), name) == tag
+
+def barename(x):
+    return x.rpartition('}')[-1]
+
+def XML(x):
+    return '{%s}%s' % (namespaces['xml'], x)
+
+def get(x, attr, default=None):
+    ns, name = attr.partition(':')[0::2]
+    return x.attrib.get('{%s}%s' % (namespaces[ns], name), default)

--- a/src/calibre/ebooks/docx/numbering.py
+++ b/src/calibre/ebooks/docx/numbering.py
@ -0,0 +1,300 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import re
+from collections import Counter
+
+from lxml.html.builder import OL, UL, SPAN
+
+from calibre.ebooks.docx.block_styles import ParagraphStyle
+from calibre.ebooks.docx.char_styles import RunStyle
+from calibre.ebooks.docx.names import XPath, get
+
+STYLE_MAP = {
+    'aiueo': 'hiragana',
+    'aiueoFullWidth': 'hiragana',
+    'hebrew1': 'hebrew',
+    'iroha': 'katakana-iroha',
+    'irohaFullWidth': 'katakana-iroha',
+    'lowerLetter': 'lower-alpha',
+    'lowerRoman': 'lower-roman',
+    'none': 'none',
+    'upperLetter': 'upper-alpha',
+    'upperRoman': 'upper-roman',
+    'chineseCounting': 'cjk-ideographic',
+    'decimalZero': 'decimal-leading-zero',
+}
+
+class Level(object):
+
+    def __init__(self, lvl=None):
+        self.restart = None
+        self.start = 0
+        self.fmt = 'decimal'
+        self.para_link = None
+        self.paragraph_style = self.character_style = None
+        self.is_numbered = False
+        self.num_template = None
+
+        if lvl is not None:
+            self.read_from_xml(lvl)
+
+    def copy(self):
+        ans = Level()
+        for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'):
+            setattr(ans, x, getattr(self, x))
+        return ans
+
+    def format_template(self, counter, ilvl):
+        def sub(m):
+            x = int(m.group(1)) - 1
+            if x > ilvl or x not in counter:
+                return ''
+            return '%d' % (counter[x] - (0 if x == ilvl else 1))
+        return re.sub(r'%(\d+)', sub, self.num_template).rstrip() + '\xa0'
+
+    def read_from_xml(self, lvl, override=False):
+        for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
+            try:
+                self.restart = int(get(lr, 'w:val'))
+            except (TypeError, ValueError):
+                pass
+
+        for lr in XPath('./w:start[@w:val]')(lvl):
+            try:
+                self.start = int(get(lr, 'w:val'))
+            except (TypeError, ValueError):
+                pass
+
+        lt = None
+        for lr in XPath('./w:lvlText[@w:val]')(lvl):
+            lt = get(lr, 'w:val')
+
+        for lr in XPath('./w:numFmt[@w:val]')(lvl):
+            val = get(lr, 'w:val')
+            if val == 'bullet':
+                self.is_numbered = False
+                self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
+            else:
+                self.is_numbered = True
+                self.fmt = STYLE_MAP.get(val, 'decimal')
+                if lt and re.match(r'%\d+\.$', lt) is None:
+                    self.num_template = lt
+
+        for lr in XPath('./w:pStyle[@w:val]')(lvl):
+            self.para_link = get(lr, 'w:val')
+
+        for pPr in XPath('./w:pPr')(lvl):
+            ps = ParagraphStyle(pPr)
+            if self.paragraph_style is None:
+                self.paragraph_style = ps
+            else:
+                self.paragraph_style.update(ps)
+
+        for rPr in XPath('./w:rPr')(lvl):
+            ps = RunStyle(rPr)
+            if self.character_style is None:
+                self.character_style = ps
+            else:
+                self.character_style.update(ps)
+
+class NumberingDefinition(object):
+
+    def __init__(self, parent=None):
+        self.levels = {}
+        if parent is not None:
+            for lvl in XPath('./w:lvl')(parent):
+                try:
+                    ilvl = int(get(lvl, 'w:ilvl', 0))
+                except (TypeError, ValueError):
+                    ilvl = 0
+                self.levels[ilvl] = Level(lvl)
+
+    def copy(self):
+        ans = NumberingDefinition()
+        for l, lvl in self.levels.iteritems():
+            ans.levels[l] = lvl.copy()
+        return ans
+
+class Numbering(object):
+
+    def __init__(self):
+        self.definitions = {}
+        self.instances = {}
+        self.counters = {}
+
+    def __call__(self, root, styles):
+        ' Read all numbering style definitions '
+        lazy_load = {}
+        for an in XPath('./w:abstractNum[@w:abstractNumId]')(root):
+            an_id = get(an, 'w:abstractNumId')
+            nsl = XPath('./w:numStyleLink[@w:val]')(an)
+            if nsl:
+                lazy_load[an_id] = get(nsl[0], 'w:val')
+            else:
+                nd = NumberingDefinition(an)
+                self.definitions[an_id] = nd
+
+        def create_instance(n, definition):
+            nd = definition.copy()
+            for lo in XPath('./w:lvlOverride')(n):
+                ilvl = get(lo, 'w:ilvl')
+                for lvl in XPath('./w:lvl')(lo)[:1]:
+                    nilvl = get(lvl, 'w:ilvl')
+                    ilvl = nilvl if ilvl is None else ilvl
+                    alvl = nd.levels.get(ilvl, None)
+                    if alvl is None:
+                        alvl = Level()
+                    alvl.read_from_xml(lvl, override=True)
+            return nd
+
+        next_pass = {}
+        for n in XPath('./w:num[@w:numId]')(root):
+            an_id = None
+            num_id = get(n, 'w:numId')
+            for an in XPath('./w:abstractNumId[@w:val]')(n):
+                an_id = get(an, 'w:val')
+            d = self.definitions.get(an_id, None)
+            if d is None:
+                next_pass[num_id] = (an_id, n)
+                continue
+            self.instances[num_id] = create_instance(n, d)
+
+        numbering_links = styles.numbering_style_links
+        for an_id, style_link in lazy_load.iteritems():
+            num_id = numbering_links[style_link]
+            self.definitions[an_id] = self.instances[num_id].copy()
+
+        for num_id, (an_id, n) in next_pass.iteritems():
+            d = self.definitions.get(an_id, None)
+            if d is not None:
+                self.instances[num_id] = create_instance(n, d)
+
+        for num_id, d in self.instances.iteritems():
+            self.counters[num_id] = Counter({lvl:d.levels[lvl].start for lvl in d.levels})
+
+    def get_pstyle(self, num_id, style_id):
+        d = self.instances.get(num_id, None)
+        if d is not None:
+            for ilvl, lvl in d.levels.iteritems():
+                if lvl.para_link == style_id:
+                    return ilvl
+
+    def get_para_style(self, num_id, lvl):
+        d = self.instances.get(num_id, None)
+        if d is not None:
+            lvl = d.levels.get(lvl, None)
+            return getattr(lvl, 'paragraph_style', None)
+
+    def update_counter(self, counter, levelnum, levels):
+        counter[levelnum] += 1
+        for ilvl, lvl in levels.iteritems():
+            restart = lvl.restart
+            if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1:
+                counter[ilvl] = lvl.start
+
+    def apply_markup(self, items, body, styles, object_map):
+        for p, num_id, ilvl in items:
+            d = self.instances.get(num_id, None)
+            if d is not None:
+                lvl = d.levels.get(ilvl, None)
+                if lvl is not None:
+                    counter = self.counters[num_id]
+                    p.tag = 'li'
+                    p.set('value', '%s' % counter[ilvl])
+                    p.set('list-lvl', str(ilvl))
+                    p.set('list-id', num_id)
+                    if lvl.num_template is not None:
+                        val = lvl.format_template(counter, ilvl)
+                        p.set('list-template', val)
+                    self.update_counter(counter, ilvl, d.levels)
+
+        templates = {}
+
+        def commit(current_run):
+            if not current_run:
+                return
+            start = current_run[0]
+            parent = start.getparent()
+            idx = parent.index(start)
+
+            d = self.instances[start.get('list-id')]
+            ilvl = int(start.get('list-lvl'))
+            lvl = d.levels[ilvl]
+            lvlid = start.get('list-id') + start.get('list-lvl')
+            wrap = (OL if lvl.is_numbered else UL)('\n\t')
+            has_template = 'list-template' in start.attrib
+            if has_template:
+                wrap.set('lvlid', lvlid)
+            else:
+                wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list'))
+            parent.insert(idx, wrap)
+            last_val = None
+            for child in current_run:
+                wrap.append(child)
+                child.tail = '\n\t'
+                if has_template:
+                    span = SPAN()
+                    span.text = child.text
+                    child.text = None
+                    for gc in child:
+                        span.append(gc)
+                    child.append(span)
+                    span = SPAN(child.get('list-template'))
+                    last = templates.get(lvlid, '')
+                    if span.text and len(span.text) > len(last):
+                        templates[lvlid] = span.text
+                    child.insert(0, span)
+                for attr in ('list-lvl', 'list-id', 'list-template'):
+                    child.attrib.pop(attr, None)
+                val = int(child.get('value'))
+                if last_val == val - 1 or wrap.tag == 'ul':
+                    child.attrib.pop('value')
+                last_val = val
+            current_run[-1].tail = '\n'
+            del current_run[:]
+
+        parents = set()
+        for child in body.iterdescendants('li'):
+            parents.add(child.getparent())
+
+        for parent in parents:
+            current_run = []
+            for child in parent:
+                if child.tag == 'li':
+                    if current_run:
+                        last = current_run[-1]
+                        if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')):
+                            commit(current_run)
+                    current_run.append(child)
+                else:
+                    commit(current_run)
+            commit(current_run)
+
+        for wrap in body.xpath('//ol[@lvlid]'):
+            lvlid = wrap.attrib.pop('lvlid')
+            wrap.tag = 'div'
+            text = ''
+            maxtext = templates.get(lvlid, '').replace('.', '')[:-1]
+            for li in wrap.iterchildren('li'):
+                t = li[0].text
+                if t and len(t) > len(text):
+                    text = t
+            for i, li in enumerate(wrap.iterchildren('li')):
+                li.tag = 'div'
+                li.attrib.pop('value', None)
+                li.set('style', 'display:table-row')
+                obj = object_map[li]
+                bs = styles.para_cache[obj]
+                if i == 0:
+                    m = len(maxtext)  # Move the table left to simulate the behavior of a list (number is to the left of text margin)
+                    wrap.set('style', 'display:table; margin-left: -%dem; padding-left: %s' % (m, bs.css.get('margin-left', 0)))
+                bs.css.pop('margin-left', None)
+                for child in li:
+                    child.set('style', 'display:table-cell')
+
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@ -0,0 +1,365 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import textwrap
+from collections import OrderedDict, Counter
+
+from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
+from calibre.ebooks.docx.char_styles import RunStyle
+from calibre.ebooks.docx.names import XPath, get
+
+
+class Style(object):
+    '''
+    Class representing a <w:style> element. Can contain block, character, etc. styles.
+    '''
+
+    name_path = XPath('./w:name[@w:val]')
+    based_on_path = XPath('./w:basedOn[@w:val]')
+
+    def __init__(self, elem):
+        self.resolved = False
+        self.style_id = get(elem, 'w:styleId')
+        self.style_type = get(elem, 'w:type')
+        names = self.name_path(elem)
+        self.name = get(names[-1], 'w:val') if names else None
+        based_on = self.based_on_path(elem)
+        self.based_on = get(based_on[0], 'w:val') if based_on else None
+        if self.style_type == 'numbering':
+            self.based_on = None
+        self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'}
+
+        self.paragraph_style = self.character_style = None
+
+        if self.style_type in {'paragraph', 'character'}:
+            if self.style_type == 'paragraph':
+                for pPr in XPath('./w:pPr')(elem):
+                    ps = ParagraphStyle(pPr)
+                    if self.paragraph_style is None:
+                        self.paragraph_style = ps
+                    else:
+                        self.paragraph_style.update(ps)
+
+            for rPr in XPath('./w:rPr')(elem):
+                rs = RunStyle(rPr)
+                if self.character_style is None:
+                    self.character_style = rs
+                else:
+                    self.character_style.update(rs)
+
+        if self.style_type == 'numbering':
+            self.numbering_style_link = None
+            for x in XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem):
+                self.numbering_style_link = get(x, 'w:val')
+
+    def resolve_based_on(self, parent):
+        if parent.paragraph_style is not None:
+            if self.paragraph_style is None:
+                self.paragraph_style = ParagraphStyle()
+            self.paragraph_style.resolve_based_on(parent.paragraph_style)
+        if parent.character_style is not None:
+            if self.character_style is None:
+                self.character_style = RunStyle()
+            self.character_style.resolve_based_on(parent.character_style)
+
+
+class Styles(object):
+
+    '''
+    Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
+    '''
+
+    def __init__(self):
+        self.id_map = OrderedDict()
+        self.para_cache = {}
+        self.para_char_cache = {}
+        self.run_cache = {}
+        self.classes = {}
+        self.counter = Counter()
+        self.default_styles = {}
+        self.numbering_style_links = {}
+
+    def __iter__(self):
+        for s in self.id_map.itervalues():
+            yield s
+
+    def __getitem__(self, key):
+        return self.id_map[key]
+
+    def __len__(self):
+        return len(self.id_map)
+
+    def get(self, key, default=None):
+        return self.id_map.get(key, default)
+
+    def __call__(self, root, fonts):
+        self.fonts = fonts
+        for s in XPath('//w:style')(root):
+            s = Style(s)
+            if s.style_id:
+                self.id_map[s.style_id] = s
+            if s.is_default:
+                self.default_styles[s.style_type] = s
+            if s.style_type == 'numbering' and s.numbering_style_link:
+                self.numbering_style_links[s.style_id] = s.numbering_style_link
+
+        self.default_paragraph_style = self.default_character_style = None
+
+        for dd in XPath('./w:docDefaults')(root):
+            for pd in XPath('./w:pPrDefault')(dd):
+                for pPr in XPath('./w:pPr')(pd):
+                    ps = ParagraphStyle(pPr)
+                    if self.default_paragraph_style is None:
+                        self.default_paragraph_style = ps
+                    else:
+                        self.default_paragraph_style.update(ps)
+            for pd in XPath('./w:rPrDefault')(dd):
+                for pPr in XPath('./w:rPr')(pd):
+                    ps = RunStyle(pPr)
+                    if self.default_character_style is None:
+                        self.default_character_style = ps
+                    else:
+                        self.default_character_style.update(ps)
+
+        def resolve(s, p):
+            if p is not None:
+                if not p.resolved:
+                    resolve(p, self.get(p.based_on))
+                s.resolve_based_on(p)
+            s.resolved = True
+
+        for s in self:
+            if not s.resolved:
+                resolve(s, self.get(s.based_on))
+
+    def para_val(self, parent_styles, direct_formatting, attr):
+        val = getattr(direct_formatting, attr)
+        if val is inherit:
+            for ps in reversed(parent_styles):
+                pval = getattr(ps, attr)
+                if pval is not inherit:
+                    val = pval
+                    break
+        return val
+
+    def run_val(self, parent_styles, direct_formatting, attr):
+        val = getattr(direct_formatting, attr)
+        if val is not inherit:
+            return val
+        if attr in direct_formatting.toggle_properties:
+            val = False
+            for rs in parent_styles:
+                pval = getattr(rs, attr)
+                if pval is True:
+                    val ^= True
+            return val
+        for rs in reversed(parent_styles):
+            rval = getattr(rs, attr)
+            if rval is not inherit:
+                return rval
+        return val
+
+    def resolve_paragraph(self, p):
+        ans = self.para_cache.get(p, None)
+        if ans is None:
+            ans = self.para_cache[p] = ParagraphStyle()
+            ans.style_name = None
+            direct_formatting = None
+            for pPr in XPath('./w:pPr')(p):
+                ps = ParagraphStyle(pPr)
+                if direct_formatting is None:
+                    direct_formatting = ps
+                else:
+                    direct_formatting.update(ps)
+
+            if direct_formatting is None:
+                direct_formatting = ParagraphStyle()
+            parent_styles = []
+            if self.default_paragraph_style is not None:
+                parent_styles.append(self.default_paragraph_style)
+
+            default_para = self.default_styles.get('paragraph', None)
+            if direct_formatting.linked_style is not None:
+                ls = self.get(direct_formatting.linked_style)
+                if ls is not None:
+                    ans.style_name = ls.name
+                    ps = ls.paragraph_style
+                    if ps is not None:
+                        parent_styles.append(ps)
+                    if ls.character_style is not None:
+                        self.para_char_cache[p] = ls.character_style
+            elif default_para is not None:
+                if default_para.paragraph_style is not None:
+                    parent_styles.append(default_para.paragraph_style)
+                if default_para.character_style is not None:
+                    self.para_char_cache[p] = default_para.character_style
+
+            is_numbering = direct_formatting.numbering is not inherit
+            if is_numbering:
+                num_id, lvl = direct_formatting.numbering
+                if num_id is not None:
+                    p.set('calibre_num_id', '%s:%s' % (lvl, num_id))
+                if num_id is not None and lvl is not None:
+                    ps = self.numbering.get_para_style(num_id, lvl)
+                    if ps is not None:
+                        parent_styles.append(ps)
+
+            for attr in ans.all_properties:
+                if not (is_numbering and attr == 'text_indent'):  # skip text-indent for lists
+                    setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
+        return ans
+
+    def resolve_run(self, r):
+        ans = self.run_cache.get(r, None)
+        if ans is None:
+            p = r.getparent()
+            ans = self.run_cache[r] = RunStyle()
+            direct_formatting = None
+            for rPr in XPath('./w:rPr')(r):
+                rs = RunStyle(rPr)
+                if direct_formatting is None:
+                    direct_formatting = rs
+                else:
+                    direct_formatting.update(rs)
+
+            if direct_formatting is None:
+                direct_formatting = RunStyle()
+
+            parent_styles = []
+            default_char = self.default_styles.get('character', None)
+            if self.default_character_style is not None:
+                parent_styles.append(self.default_character_style)
+            pstyle = self.para_char_cache.get(p, None)
+            if pstyle is not None:
+                parent_styles.append(pstyle)
+            if direct_formatting.linked_style is not None:
+                ls = self.get(direct_formatting.linked_style).character_style
+                if ls is not None:
+                    parent_styles.append(ls)
+            elif default_char is not None and default_char.character_style is not None:
+                parent_styles.append(default_char.character_style)
+
+            for attr in ans.all_properties:
+                setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
+
+            if ans.font_family is not inherit:
+                ans.font_family = self.fonts.family_for(ans.font_family, ans.b, ans.i)
+
+        return ans
+
+    def resolve(self, obj):
+        if obj.tag.endswith('}p'):
+            return self.resolve_paragraph(obj)
+        if obj.tag.endswith('}r'):
+            return self.resolve_run(obj)
+
+    def cascade(self, layers):
+        self.body_font_family = 'serif'
+        self.body_font_size = '10pt'
+
+        for p, runs in layers.iteritems():
+            char_styles = [self.resolve_run(r) for r in runs]
+            block_style = self.resolve_paragraph(p)
+            c = Counter()
+            for s in char_styles:
+                if s.font_family is not inherit:
+                    c[s.font_family] += 1
+            if c:
+                family = c.most_common(1)[0][0]
+                block_style.font_family = family
+                for s in char_styles:
+                    if s.font_family == family:
+                        s.font_family = inherit
+
+            sizes = [s.font_size for s in char_styles if s.font_size is not inherit]
+            if sizes:
+                sz = block_style.font_size = sizes[0]
+                for s in char_styles:
+                    if s.font_size == sz:
+                        s.font_size = inherit
+
+        block_styles = [self.resolve_paragraph(p) for p in layers]
+        c = Counter()
+        for s in block_styles:
+            if s.font_family is not inherit:
+                c[s.font_family] += 1
+
+        if c:
+            self.body_font_family = family = c.most_common(1)[0][0]
+            for s in block_styles:
+                if s.font_family == family:
+                    s.font_family = inherit
+
+        c = Counter()
+        for s in block_styles:
+            if s.font_size is not inherit:
+                c[s.font_size] += 1
+
+        if c:
+            sz = c.most_common(1)[0][0]
+            for s in block_styles:
+                if s.font_size == sz:
+                    s.font_size = inherit
+            self.body_font_size = '%.3gpt' % sz
+
+    def resolve_numbering(self, numbering):
+        # When a numPr element appears inside a paragraph style, the lvl info
+        # must be discarder and pStyle used instead.
+        self.numbering = numbering
+        for style in self:
+            ps = style.paragraph_style
+            if ps is not None and ps.numbering is not inherit:
+                lvl = numbering.get_pstyle(ps.numbering[0], style.style_id)
+                if lvl is None:
+                    ps.numbering = inherit
+                else:
+                    ps.numbering = (ps.numbering[0], lvl)
+
+    def register(self, css, prefix):
+        h = hash(frozenset(css.iteritems()))
+        ans, _ = self.classes.get(h, (None, None))
+        if ans is None:
+            self.counter[prefix] += 1
+            ans = '%s_%d' % (prefix, self.counter[prefix])
+            self.classes[h] = (ans, css)
+        return ans
+
+    def generate_classes(self):
+        for bs in self.para_cache.itervalues():
+            css = bs.css
+            if css:
+                self.register(css, 'block')
+        for bs in self.run_cache.itervalues():
+            css = bs.css
+            if css:
+                self.register(css, 'text')
+
+    def class_name(self, css):
+        h = hash(frozenset(css.iteritems()))
+        return self.classes.get(h, (None, None))[0]
+
+    def generate_css(self, dest_dir, docx):
+        ef = self.fonts.embed_fonts(dest_dir, docx)
+        prefix = textwrap.dedent(
+            '''\
+            body { font-family: %s; font-size: %s }
+
+            p { text-indent: 1.5em }
+
+            ul, ol, p { margin: 0; padding: 0 }
+            ''') % (self.body_font_family, self.body_font_size)
+        if ef:
+            prefix = ef + '\n' + prefix
+
+        ans = []
+        for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
+            b = ('\t%s: %s;' % (k, v) for k, v in css.iteritems())
+            b = '\n'.join(b)
+            ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';')))
+        return prefix + '\n' + '\n'.join(ans)
+
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@ -6,36 +6,250 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

-import sys, os
+import sys, os, re
+from collections import OrderedDict

 from lxml import html
-from lxml.html.builder import (HTML, HEAD, TITLE, BODY, LINK, META)
+from lxml.html.builder import (
+    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)

-from calibre.ebooks.docx.container import Container
+from calibre.ebooks.docx.container import DOCX, fromstring
+from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS
+from calibre.ebooks.docx.styles import Styles, inherit
+from calibre.ebooks.docx.numbering import Numbering
+from calibre.ebooks.docx.fonts import Fonts
+from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
+
+class Text:
+
+    def __init__(self, elem, attr, buf):
+        self.elem, self.attr, self.buf = elem, attr, buf
+
+    def add_elem(self, elem):
+        setattr(self.elem, self.attr, ''.join(self.buf))
+        self.elem, self.attr, self.buf = elem, 'tail', []

 class Convert(object):

    def __init__(self, path_or_stream, dest_dir=None, log=None):
-        self.container = Container(path_or_stream, log=log)
-        self.log = self.container.log
+        self.docx = DOCX(path_or_stream, log=log)
+        self.log = self.docx.log
        self.dest_dir = dest_dir or os.getcwdu()
+        self.mi = self.docx.metadata
        self.body = BODY()
+        self.styles = Styles()
+        self.object_map = OrderedDict()
        self.html = HTML(
            HEAD(
                META(charset='utf-8'),
-                TITLE('TODO: read from metadata'),
+                TITLE(self.mi.title or _('Unknown')),
                LINK(rel='stylesheet', type='text/css', href='docx.css'),
            ),
            self.body
        )
+        self.html.text='\n\t'
+        self.html[0].text='\n\t\t'
+        self.html[0].tail='\n'
+        for child in self.html[0]:
+            child.tail = '\n\t\t'
+        self.html[0][-1].tail = '\n\t'
+        self.html[1].text = self.html[1].tail = '\n'
+        lang = canonicalize_lang(self.mi.language)
+        if lang and lang != 'und':
+            lang = lang_as_iso639_1(lang)
+            if lang:
+                self.html.set('lang', lang)

    def __call__(self):
+        doc = self.docx.document
+        relationships_by_id, relationships_by_type = self.docx.document_relationships
+        self.read_styles(relationships_by_type)
+        self.layers = OrderedDict()
+        for wp in XPath('//w:p')(doc):
+            p = self.convert_p(wp)
+            self.body.append(p)
+        # TODO: tables <w:tbl> child of <w:body> (nested tables?)
+        # TODO: Last section properties <w:sectPr> child of <w:body>
+
+        self.styles.cascade(self.layers)
+
+        numbered = []
+        for html_obj, obj in self.object_map.iteritems():
+            raw = obj.get('calibre_num_id', None)
+            if raw is not None:
+                lvl, num_id = raw.partition(':')[0::2]
+                try:
+                    lvl = int(lvl)
+                except (TypeError, ValueError):
+                    lvl = 0
+                numbered.append((html_obj, num_id, lvl))
+        self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
+
+        if len(self.body) > 0:
+            self.body.text = '\n\t'
+            for child in self.body:
+                child.tail = '\n\t'
+            self.body[-1].tail = '\n'
+
+        self.styles.generate_classes()
+        for html_obj, obj in self.object_map.iteritems():
+            style = self.styles.resolve(obj)
+            if style is not None:
+                css = style.css
+                if css:
+                    cls = self.styles.class_name(css)
+                    if cls:
+                        html_obj.set('class', cls)
        self.write()

+    def read_styles(self, relationships_by_type):
+
+        def get_name(rtype, defname):
+            name = relationships_by_type.get(rtype, None)
+            if name is None:
+                cname = self.docx.document_name.split('/')
+                cname[-1] = defname
+                if self.docx.exists(cname):
+                    name = name
+            return name
+
+        nname = get_name(NUMBERING, 'numbering.xml')
+        sname = get_name(STYLES, 'styles.xml')
+        fname = get_name(FONTS, 'fontTable.xml')
+        numbering = self.numbering = Numbering()
+        fonts = self.fonts = Fonts()
+
+        if fname is not None:
+            embed_relationships = self.docx.get_relationships(fname)[0]
+            try:
+                raw = self.docx.read(fname)
+            except KeyError:
+                self.log.warn('Fonts table %s does not exist' % fname)
+            else:
+                fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir)
+
+        if sname is not None:
+            try:
+                raw = self.docx.read(sname)
+            except KeyError:
+                self.log.warn('Styles %s do not exist' % sname)
+            else:
+                self.styles(fromstring(raw), fonts)
+
+        if nname is not None:
+            try:
+                raw = self.docx.read(nname)
+            except KeyError:
+                self.log.warn('Numbering styles %s do not exist' % nname)
+            else:
+                numbering(fromstring(raw), self.styles)
+
+        self.styles.resolve_numbering(numbering)
+
    def write(self):
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
+        css = self.styles.generate_css(self.dest_dir, self.docx)
+        if css:
+            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
+                f.write(css.encode('utf-8'))
+
+    def convert_p(self, p):
+        dest = P()
+        self.object_map[dest] = p
+        style = self.styles.resolve_paragraph(p)
+        self.layers[p] = []
+        for run in XPath('descendant::w:r')(p):
+            span = self.convert_run(run)
+            dest.append(span)
+            self.layers[p].append(run)
+
+        m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
+        if m is not None:
+            n = min(1, max(6, int(m.group(1))))
+            dest.tag = 'h%d' % n
+
+        if style.direction == 'rtl':
+            dest.set('dir', 'rtl')
+
+        border_runs = []
+        common_borders = []
+        for span in dest:
+            run = self.object_map[span]
+            style = self.styles.resolve_run(run)
+            if not border_runs or border_runs[-1][1].same_border(style):
+                border_runs.append((span, style))
+            elif border_runs:
+                if len(border_runs) > 1:
+                    common_borders.append(border_runs)
+                border_runs = []
+
+        for border_run in common_borders:
+            spans = []
+            bs = {}
+            for span, style in border_run:
+                style.get_border_css(bs)
+                style.clear_border_css()
+                spans.append(span)
+            if bs:
+                cls = self.styles.register(bs, 'text_border')
+                wrapper = self.wrap_elems(spans, SPAN())
+                wrapper.set('class', cls)
+
+        return dest
+
+    def wrap_elems(self, elems, wrapper):
+        p = elems[0].getparent()
+        idx = p.index(elems[0])
+        p.insert(idx, wrapper)
+        wrapper.tail = elems[-1].tail
+        elems[-1].tail = None
+        for elem in elems:
+            p.remove(elem)
+            wrapper.append(elem)
+
+    def convert_run(self, run):
+        ans = SPAN()
+        self.object_map[ans] = run
+        text = Text(ans, 'text', [])
+
+        for child in run:
+            if is_tag(child, 'w:t'):
+                if not child.text:
+                    continue
+                space = child.get(XML('space'), None)
+                if space == 'preserve':
+                    text.add_elem(SPAN(child.text, style="whitespace:pre-wrap"))
+                    ans.append(text.elem)
+                else:
+                    text.buf.append(child.text)
+            elif is_tag(child, 'w:cr'):
+                text.add_elem(BR())
+                ans.append(text.elem)
+            elif is_tag(child, 'w:br'):
+                typ = child.get('type', None)
+                if typ in {'column', 'page'}:
+                    br = BR(style='page-break-after:always')
+                else:
+                    clear = child.get('clear', None)
+                    if clear in {'all', 'left', 'right'}:
+                        br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
+                    else:
+                        br = BR()
+                text.add_elem(br)
+                ans.append(text.elem)
+        if text.buf:
+            setattr(text.elem, text.attr, ''.join(text.buf))
+
+        style = self.styles.resolve_run(run)
+        if style.vert_align in {'superscript', 'subscript'}:
+            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
+        if style.lang is not inherit:
+            ans.lang = style.lang
+        return ans

 if __name__ == '__main__':
-    Convert(sys.argv[-1])()
+    from calibre.utils.logging import default_log
+    default_log.filter_level = default_log.DEBUG
+    Convert(sys.argv[-1], log=default_log)()
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -136,7 +136,7 @@ class FB2MLizer(object):
            metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
-            metadata['author'] = u'<author><first-name></first-name><last-name><last-name></author>'
+            metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'

        metadata['keywords'] = u''
        tags = list(map(unicode, self.oeb_book.metadata.subject))
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@ -163,7 +163,8 @@ class MOBIFile(object):
            ext = 'dat'
            prefix = 'binary'
            suffix = ''
-            if sig in {b'HUFF', b'CDIC', b'INDX'}: continue
+            if sig in {b'HUFF', b'CDIC', b'INDX'}:
+                continue
            # TODO: Ignore CNCX records as well
            if sig == b'FONT':
                font = read_font_record(rec.raw)
@ -196,7 +197,6 @@ class MOBIFile(object):
            vals = list(index)[:-1] + [None, None, None, None]
            entry_map.append(Entry(*(vals[:12])))

-
        indexing_data = collect_indexing_data(entry_map, list(map(len,
            self.text_records)))
        self.indexing_data = [DOC + '\n' +textwrap.dedent('''\
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -16,7 +16,8 @@ from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
 from calibre.utils.magick.draw import identify_data

 MBP_NS = 'http://mobipocket.com/ns/mbp'
-def MBP(name): return '{%s}%s' % (MBP_NS, name)
+def MBP(name):
+    return '{%s}%s' % (MBP_NS, name)

 MOBI_NSMAP = {None: XHTML_NS, 'mbp': MBP_NS}

@ -413,7 +414,7 @@ class MobiMLizer(object):
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
-                            pixs = int(round(float(value) / \
+                            pixs = int(round(float(value) /
                                (72./self.profile.dpi)))
                        except:
                            continue
@ -488,8 +489,6 @@ class MobiMLizer(object):
        if elem.text:
            if istate.preserve:
                text = elem.text
-            elif len(elem) > 0 and isspace(elem.text):
-                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@ -181,9 +181,9 @@ class BookHeader(object):
                self.codec = 'cp1252' if not user_encoding else user_encoding
                log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
                    self.codec))
-            # Some KF8 files have header length == 256 (generated by kindlegen
-            # 2.7?). See https://bugs.launchpad.net/bugs/1067310
-            max_header_length = 0x100
+            # Some KF8 files have header length == 264 (generated by kindlegen
+            # 2.9?). See https://bugs.launchpad.net/bugs/1179144
+            max_header_length = 500  # We choose 500 for future versions of kindlegen

            if (ident == 'TEXTREAD' or self.length < 0xE4 or
                    self.length > max_header_length or
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -871,6 +871,7 @@ class Manifest(object):
            orig_data = data
            fname = urlunquote(self.href)
            self.oeb.log.debug('Parsing', fname, '...')
+            self.oeb.html_preprocessor.current_href = self.href
            try:
                data = parse_html(data, log=self.oeb.log,
                        decoder=self.oeb.decode,
@ -1312,9 +1313,9 @@ class Guide(object):
                         ('notes', __('Notes')),
                         ('preface', __('Preface')),
                         ('text', __('Main Text'))]
-        TYPES = set(t for t, _ in _TYPES_TITLES)
+        TYPES = set(t for t, _ in _TYPES_TITLES)  # noqa
        TITLES = dict(_TYPES_TITLES)
-        ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES))
+        ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES))  # noqa

        def __init__(self, oeb, type, title, href):
            self.oeb = oeb
--- a/src/calibre/ebooks/oeb/iterator/init.py
+++ b/src/calibre/ebooks/oeb/iterator/init.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, re
+import sys, os, re

 from calibre.customize.ui import available_input_formats

@ -26,17 +26,18 @@ def EbookIterator(*args, **kwargs):
    from calibre.ebooks.oeb.iterator.book import EbookIterator
    return EbookIterator(*args, **kwargs)

-def get_preprocess_html(path_to_ebook, output):
-    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
-    iterator = EbookIterator(path_to_ebook)
-    iterator.__enter__(only_input_plugin=True, run_char_count=False,
-            read_anchor_map=False)
-    preprocessor = HTMLPreProcessor(None, False)
-    with open(output, 'wb') as out:
-        for path in iterator.spine:
-            with open(path, 'rb') as f:
-                html = f.read().decode('utf-8', 'replace')
-            html = preprocessor(html, get_preprocess_html=True)
+def get_preprocess_html(path_to_ebook, output=None):
+    from calibre.ebooks.conversion.plumber import set_regex_wizard_callback, Plumber
+    from calibre.utils.logging import DevNull
+    from calibre.ptempfile import TemporaryDirectory
+    raw = {}
+    set_regex_wizard_callback(raw.__setitem__)
+    with TemporaryDirectory('_regex_wiz') as tdir:
+        pl = Plumber(path_to_ebook, os.path.join(tdir, 'a.epub'), DevNull(), for_regex_wizard=True)
+        pl.run()
+        items = [raw[item.href] for item in pl.oeb.spine if item.href in raw]
+
+    with (sys.stdout if output is None else open(output, 'wb')) as out:
+        for html in items:
            out.write(html.encode('utf-8'))
            out.write(b'\n\n' + b'-'*80 + b'\n\n')
-
--- a/src/calibre/ebooks/oeb/iterator/book.py
+++ b/src/calibre/ebooks/oeb/iterator/book.py
@ -25,7 +25,7 @@ from calibre.ebooks.oeb.transforms.cover import CoverManager
 from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
 from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin

-TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
+TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(
        '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
        ).replace('__width__', '600').replace('__height__', '800')

--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -44,8 +44,10 @@ META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]')

 def merge_multiple_html_heads_and_bodies(root, log=None):
    heads, bodies = xpath(root, '//h:head'), xpath(root, '//h:body')
-    if not (len(heads) > 1 or len(bodies) > 1): return root
-    for child in root: root.remove(child)
+    if not (len(heads) > 1 or len(bodies) > 1):
+        return root
+    for child in root:
+        root.remove(child)
    head = root.makeelement(XHTML('head'))
    body = root.makeelement(XHTML('body'))
    for h in heads:
@ -368,8 +370,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
        meta.getparent().remove(meta)
    meta = etree.SubElement(head, XHTML('meta'),
        attrib={'http-equiv': 'Content-Type'})
-    meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
-                                                    # attribute
+    meta.set('content', 'text/html; charset=utf-8')  # Ensure content is second attribute

    # Ensure has a <body/>
    if not xpath(data, '/h:html/h:body'):
--- a/src/calibre/ebooks/oeb/polish/toc.py
+++ b/src/calibre/ebooks/oeb/polish/toc.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'

 import re
 from urlparse import urlparse
-from collections import deque
+from collections import deque, Counter
 from functools import partial

 from lxml import etree
@ -29,7 +29,8 @@ class TOC(object):
    def __init__(self, title=None, dest=None, frag=None):
        self.title, self.dest, self.frag = title, dest, frag
        self.dest_exists = self.dest_error = None
-        if self.title: self.title = self.title.strip()
+        if self.title:
+            self.title = self.title.strip()
        self.parent = None
        self.children = []

@ -326,11 +327,13 @@ def create_ncx(toc, to_href, btitle, lang, uid):
    navmap = etree.SubElement(ncx, NCX('navMap'))
    spat = re.compile(r'\s+')

-    def process_node(xml_parent, toc_parent, play_order=0):
+    play_order = Counter()
+
+    def process_node(xml_parent, toc_parent):
        for child in toc_parent:
-            play_order += 1
+            play_order['c'] += 1
            point = etree.SubElement(xml_parent, NCX('navPoint'), id=uuid_id(),
-                            playOrder=str(play_order))
+                            playOrder=str(play_order['c']))
            label = etree.SubElement(point, NCX('navLabel'))
            title = child.title
            if title:
@ -341,7 +344,7 @@ def create_ncx(toc, to_href, btitle, lang, uid):
                if child.frag:
                    href += '#'+child.frag
                etree.SubElement(point, NCX('content'), src=href)
-            process_node(point, child, play_order)
+            process_node(point, child)

    process_node(navmap, toc)
    return ncx
--- a/src/calibre/ebooks/pdf/render/links.py
+++ b/src/calibre/ebooks/pdf/render/links.py
@ -45,11 +45,15 @@ class Links(object):
            href, page, rect = link
            p, frag = href.partition('#')[0::2]
            try:
-                link = ((path, p, frag or None), self.pdf.get_pageref(page).obj, Array(rect))
+                pref = self.pdf.get_pageref(page).obj
            except IndexError:
-                self.log.warn('Unable to find page for link: %r, ignoring it' % link)
+                try:
+                    pref = self.pdf.get_pageref(page-1).obj
+                except IndexError:
+                    self.pdf.debug('Unable to find page for link: %r, ignoring it' % link)
                    continue
-            self.links.append(link)
+                self.pdf.debug('The link %s points to non-existent page, moving it one page back' % href)
+            self.links.append(((path, p, frag or None), pref, Array(rect)))

    def add_links(self):
        for link in self.links:
--- a/src/calibre/gui2/actions/show_quickview.py
+++ b/src/calibre/gui2/actions/show_quickview.py
@ -38,6 +38,13 @@ class ShowQuickviewAction(InterfaceAction):
                Quickview(self.gui, self.gui.library_view, index)
            self.current_instance.show()

+    def change_quickview_column(self, idx):
+        self.show_quickview()
+        if self.current_instance:
+            if self.current_instance.is_closed:
+                return
+            self.current_instance.change_quickview_column.emit(idx)
+
    def library_changed(self, db):
        if self.current_instance and not self.current_instance.is_closed:
            self.current_instance.set_database(db)
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -122,7 +122,8 @@ def device_name_for_plugboards(device_class):
 class DeviceManager(Thread): # {{{

    def __init__(self, connected_slot, job_manager, open_feedback_slot,
-            open_feedback_msg, allow_connect_slot, sleep_time=2):
+                 open_feedback_msg, allow_connect_slot,
+                 after_callback_feedback_slot, sleep_time=2):
        '''
        :sleep_time: Time to sleep between device probes in secs
        '''
@ -150,6 +151,7 @@ class DeviceManager(Thread): # {{{
        self.ejected_devices  = set([])
        self.mount_connection_requests = Queue.Queue(0)
        self.open_feedback_slot = open_feedback_slot
+        self.after_callback_feedback_slot = after_callback_feedback_slot
        self.open_feedback_msg = open_feedback_msg
        self._device_information = None
        self.current_library_uuid = None
@ -392,6 +394,10 @@ class DeviceManager(Thread): # {{{
                        self.device.set_progress_reporter(job.report_progress)
                    self.current_job.run()
                    self.current_job = None
+                    feedback = getattr(self.device, 'user_feedback_after_callback', None)
+                    if feedback is not None:
+                        self.device.user_feedback_after_callback = None
+                        self.after_callback_feedback_slot(feedback)
                else:
                    break
            if do_sleep:
@ -850,7 +856,7 @@ class DeviceMixin(object): # {{{
        self.device_manager = DeviceManager(FunctionDispatcher(self.device_detected),
                self.job_manager, Dispatcher(self.status_bar.show_message),
                Dispatcher(self.show_open_feedback),
-                FunctionDispatcher(self.allow_connect))
+                FunctionDispatcher(self.allow_connect), Dispatcher(self.after_callback_feedback))
        self.device_manager.start()
        self.device_manager.devices_initialized.wait()
        if tweaks['auto_connect_to_folder']:
@ -862,6 +868,10 @@ class DeviceMixin(object): # {{{
                name, show_copy_button=False,
                override_icon=QIcon(icon))

+    def after_callback_feedback(self, feedback):
+        title, msg, det_msg = feedback
+        info_dialog(self, feedback['title'], feedback['msg'], det_msg=feedback['det_msg']).show()
+
    def debug_detection(self, done):
        self.debug_detection_callback = weakref.ref(done)
        self.device_manager.debug_detection(FunctionDispatcher(self.debug_detection_done))
@ -1116,7 +1126,7 @@ class DeviceMixin(object): # {{{
            return

        dm = self.iactions['Remove Books'].delete_memory
-        if dm.has_key(job):
+        if job in dm:
            paths, model = dm.pop(job)
            self.device_manager.remove_books_from_metadata(paths,
                    self.booklists())
@ -1141,7 +1151,7 @@ class DeviceMixin(object): # {{{
    def dispatch_sync_event(self, dest, delete, specific):
        rows = self.library_view.selectionModel().selectedRows()
        if not rows or len(rows) == 0:
-            error_dialog(self, _('No books'), _('No books')+' '+\
+            error_dialog(self, _('No books'), _('No books')+' '+
                    _('selected to send')).exec_()
            return

@ -1160,7 +1170,7 @@ class DeviceMixin(object): # {{{
                if fmts:
                    for f in fmts.split(','):
                        f = f.lower()
-                        if format_count.has_key(f):
+                        if f in format_count:
                            format_count[f] += 1
                        else:
                            format_count[f] = 1
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@ -28,7 +28,10 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):

        all_formats = set(all_formats)
        self.calibre_known_formats = device.FORMATS
+        try:
            self.device_name = device.get_gui_name()
+        except TypeError:
+            self.device_name = getattr(device, 'gui_name', None) or _('Device')
        if device.USER_CAN_ADD_NEW_FORMATS:
            all_formats = set(all_formats) | set(BOOK_EXTENSIONS)

--- a/src/calibre/gui2/dialogs/quickview.py
+++ b/src/calibre/gui2/dialogs/quickview.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'

 from PyQt4.Qt import (Qt, QDialog, QAbstractItemView, QTableWidgetItem,
                      QListWidgetItem, QByteArray, QCoreApplication,
-                      QApplication)
+                      QApplication, pyqtSignal)

 from calibre.customize.ui import find_plugin
 from calibre.gui2 import gprefs
@ -44,6 +44,8 @@ class TableItem(QTableWidgetItem):

 class Quickview(QDialog, Ui_Quickview):

+    change_quickview_column   = pyqtSignal(object)
+
    def __init__(self, gui, view, row):
        QDialog.__init__(self, gui, flags=Qt.Window)
        Ui_Quickview.__init__(self)
@ -105,6 +107,7 @@ class Quickview(QDialog, Ui_Quickview):
        self.refresh(row)

        self.view.clicked.connect(self.slave)
+        self.change_quickview_column.connect(self.slave)
        QCoreApplication.instance().aboutToQuit.connect(self.save_state)
        self.search_button.clicked.connect(self.do_search)
        view.model().new_bookdisplay_data.connect(self.book_was_changed)
@ -146,6 +149,9 @@ class Quickview(QDialog, Ui_Quickview):
        key = self.view.model().column_map[self.current_column]
        book_id = self.view.model().id(bv_row)

+        if self.current_book_id == book_id and self.current_key == key:
+            return
+
        # Only show items for categories
        if not self.db.field_metadata[key]['is_category']:
            if self.current_key is None:
@ -164,6 +170,8 @@ class Quickview(QDialog, Ui_Quickview):

        if vals:
            self.no_valid_items = False
+            if self.db.field_metadata[key]['datatype'] == 'rating':
+                vals = unicode(vals/2)
            if not isinstance(vals, list):
                vals = [vals]
            vals.sort(key=sort_key)
@ -198,8 +206,7 @@ class Quickview(QDialog, Ui_Quickview):
            sv = selected_item
        sv = sv.replace('"', r'\"')
        self.last_search = self.current_key+':"=' + sv + '"'
-        books = self.db.search_getting_ids(self.last_search,
-                                           self.db.data.search_restriction)
+        books = self.db.search(self.last_search, return_matches=True)

        self.books_table.setRowCount(len(books))
        self.books_label.setText(_('Books with selected item "{0}": {1}').
--- a/src/calibre/gui2/dialogs/template_dialog.py
+++ b/src/calibre/gui2/dialogs/template_dialog.py
@ -3,17 +3,21 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __license__   = 'GPL v3'

-import json
+import json, os, traceback

 from PyQt4.Qt import (Qt, QDialog, QDialogButtonBox, QSyntaxHighlighter, QFont,
-                      QRegExp, QApplication, QTextCharFormat, QColor, QCursor)
+                      QRegExp, QApplication, QTextCharFormat, QColor, QCursor,
+                      QIcon, QSize)

-from calibre.gui2 import error_dialog
+from calibre import sanitize_file_name_unicode
+from calibre.constants import config_dir
 from calibre.gui2.dialogs.template_dialog_ui import Ui_TemplateDialog
 from calibre.utils.formatter_functions import formatter_functions
+from calibre.utils.icu import sort_key
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.book.formatter import SafeFormat
-from calibre.library.coloring import (displayable_columns)
+from calibre.library.coloring import (displayable_columns, color_row_key)
+from calibre.gui2 import error_dialog, choose_files, pixmap_to_data


 class ParenPosition:
@ -198,25 +202,56 @@ class TemplateHighlighter(QSyntaxHighlighter):

 class TemplateDialog(QDialog, Ui_TemplateDialog):

-    def __init__(self, parent, text, mi=None, fm=None, color_field=None):
+    def __init__(self, parent, text, mi=None, fm=None, color_field=None,
+                 icon_field_key=None, icon_rule_kind=None):
        QDialog.__init__(self, parent)
        Ui_TemplateDialog.__init__(self)
        self.setupUi(self)

        self.coloring = color_field is not None
+        self.iconing = icon_field_key is not None
+
+        cols = []
+        if fm is not None:
+            for key in sorted(displayable_columns(fm),
+                              key=lambda(k): sort_key(fm[k]['name']) if k != color_row_key else 0):
+                if key == color_row_key and not self.coloring:
+                    continue
+                from calibre.gui2.preferences.coloring import all_columns_string
+                name = all_columns_string if key == color_row_key else fm[key]['name']
+                if name:
+                    cols.append((name, key))
+
+        self.color_layout.setVisible(False)
+        self.icon_layout.setVisible(False)
+
        if self.coloring:
-            cols = sorted([k for k in displayable_columns(fm)])
-            self.colored_field.addItems(cols)
-            self.colored_field.setCurrentIndex(self.colored_field.findText(color_field))
+            self.color_layout.setVisible(True)
+            for n1, k1 in cols:
+                self.colored_field.addItem(n1, k1)
+            self.colored_field.setCurrentIndex(self.colored_field.findData(color_field))
            colors = QColor.colorNames()
            colors.sort()
            self.color_name.addItems(colors)
-        else:
-            self.colored_field.setVisible(False)
-            self.colored_field_label.setVisible(False)
-            self.color_chooser_label.setVisible(False)
-            self.color_name.setVisible(False)
-            self.color_copy_button.setVisible(False)
+        elif self.iconing:
+            self.icon_layout.setVisible(True)
+            for n1, k1 in cols:
+                self.icon_field.addItem(n1, k1)
+            self.icon_file_names = []
+            d = os.path.join(config_dir, 'cc_icons')
+            if os.path.exists(d):
+                for icon_file in os.listdir(d):
+                    icon_file = icu_lower(icon_file)
+                    if os.path.exists(os.path.join(d, icon_file)):
+                        if icon_file.endswith('.png'):
+                            self.icon_file_names.append(icon_file)
+            self.icon_file_names.sort(key=sort_key)
+            self.update_filename_box()
+            self.icon_with_text.setChecked(True)
+            if icon_rule_kind == 'icon_only':
+                self.icon_without_text.setChecked(True)
+            self.icon_field.setCurrentIndex(self.icon_field.findData(icon_field_key))
+
        if mi:
            self.mi = mi
        else:
@ -248,6 +283,8 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
        self.buttonBox.button(QDialogButtonBox.Ok).setText(_('&OK'))
        self.buttonBox.button(QDialogButtonBox.Cancel).setText(_('&Cancel'))
        self.color_copy_button.clicked.connect(self.color_to_clipboard)
+        self.filename_button.clicked.connect(self.filename_button_clicked)
+        self.icon_copy_button.clicked.connect(self.icon_to_clipboard)

        try:
            with open(P('template-functions.json'), 'rb') as f:
@ -276,11 +313,55 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
                '<a href="http://manual.calibre-ebook.com/template_ref.html">'
                '%s</a>'%tt)

+    def filename_button_clicked(self):
+        try:
+            path = choose_files(self, 'choose_category_icon',
+                        _('Select Icon'), filters=[
+                        ('Images', ['png', 'gif', 'jpg', 'jpeg'])],
+                    all_files=False, select_only_single_file=True)
+            if path:
+                icon_path = path[0]
+                icon_name = sanitize_file_name_unicode(
+                             os.path.splitext(
+                                   os.path.basename(icon_path))[0]+'.png')
+                if icon_name not in self.icon_file_names:
+                    self.icon_file_names.append(icon_name)
+                    self.update_filename_box()
+                    try:
+                        p = QIcon(icon_path).pixmap(QSize(128, 128))
+                        d = os.path.join(config_dir, 'cc_icons')
+                        if not os.path.exists(os.path.join(d, icon_name)):
+                            if not os.path.exists(d):
+                                os.makedirs(d)
+                            with open(os.path.join(d, icon_name), 'wb') as f:
+                                f.write(pixmap_to_data(p, format='PNG'))
+                    except:
+                        traceback.print_exc()
+                self.icon_files.setCurrentIndex(self.icon_files.findText(icon_name))
+                self.icon_files.adjustSize()
+        except:
+            traceback.print_exc()
+        return
+
+    def update_filename_box(self):
+        self.icon_files.clear()
+        self.icon_file_names.sort(key=sort_key)
+        self.icon_files.addItem('')
+        self.icon_files.addItems(self.icon_file_names)
+        for i,filename in enumerate(self.icon_file_names):
+            icon = QIcon(os.path.join(config_dir, 'cc_icons', filename))
+            self.icon_files.setItemIcon(i+1, icon)
+
    def color_to_clipboard(self):
        app = QApplication.instance()
        c = app.clipboard()
        c.setText(unicode(self.color_name.currentText()))

+    def icon_to_clipboard(self):
+        app = QApplication.instance()
+        c = app.clipboard()
+        c.setText(unicode(self.icon_files.currentText()))
+
    def textbox_changed(self):
        cur_text = unicode(self.textbox.toPlainText())
        if self.last_text != cur_text:
@ -324,5 +405,14 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
                    _('The template box cannot be empty'), show=True)
                return

-        self.rule = (unicode(self.colored_field.currentText()), txt)
+            self.rule = (unicode(self.colored_field.itemData(
+                                self.colored_field.currentIndex()).toString()), txt)
+        elif self.iconing:
+            rt = 'icon' if self.icon_with_text.isChecked() else 'icon_only'
+            self.rule = (rt,
+                         unicode(self.icon_field.itemData(
+                                self.icon_field.currentIndex()).toString()),
+                         txt)
+        else:
+            self.rule = ('', txt)
        QDialog.accept(self)
--- a/src/calibre/gui2/dialogs/template_dialog.ui
+++ b/src/calibre/gui2/dialogs/template_dialog.ui
@ -21,6 +21,7 @@
  </property>
  <layout class="QVBoxLayout" name="verticalLayout">
   <item>
+    <widget class="QWidget" name="color_layout">
     <layout class="QGridLayout">
      <item row="0" column="0">
       <widget class="QLabel" name="colored_field_label">
@ -62,6 +63,97 @@
       </widget>
      </item>
     </layout>
+    </widget>
+   </item>
+   <item>
+    <widget class="QWidget" name="icon_layout">
+     <layout class="QGridLayout">
+      <item row="0" column="0" colspan="2">
+       <widget class="QGroupBox">
+        <property name="title">
+         <string>Kind</string>
+        </property>
+        <layout class="QHBoxLayout">
+         <item>
+          <widget class="QRadioButton" name="icon_without_text">
+           <property name="text">
+            <string>icon with no text</string>
+           </property>
+          </widget>
+         </item>
+         <item>
+          <widget class="QRadioButton" name="icon_with_text">
+           <property name="text">
+            <string>icon with text</string>
+           </property>
+          </widget>
+         </item>
+        </layout>
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
+          <horstretch>100</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="0">
+       <widget class="QLabel" name="icon_chooser_label">
+        <property name="text">
+         <string>Apply the icon to column:</string>
+        </property>
+        <property name="buddy">
+         <cstring>icon_field</cstring>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="1">
+       <widget class="QComboBox" name="icon_field">
+       </widget>
+      </item>
+      <item row="2" column="0">
+       <widget class="QLabel" name="image_chooser_label">
+        <property name="text">
+         <string>Copy an icon file name to the clipboard:</string>
+        </property>
+        <property name="buddy">
+         <cstring>color_name</cstring>
+        </property>
+       </widget>
+      </item>
+      <item row="2" column="1">
+       <widget class="QWidget">
+        <layout class="QHBoxLayout">
+         <item>
+          <widget class="QComboBox" name="icon_files">
+          </widget>
+         </item>
+         <item>
+          <widget class="QToolButton" name="icon_copy_button">
+           <property name="icon">
+            <iconset resource="../../../../resources/images.qrc">
+             <normaloff>:/images/edit-copy.png</normaloff>:/images/edit-copy.png</iconset>
+           </property>
+           <property name="toolTip">
+            <string>Copy the selected icon file name to the clipboard</string>
+           </property>
+          </widget>
+         </item>
+         <item>
+          <widget class="QPushButton" name="filename_button">
+           <property name="text">
+            <string>Add icon</string>
+           </property>
+           <property name="toolTip">
+            <string>Add an icon file to the set of choices</string>
+           </property>
+          </widget>
+         </item>
+        </layout>
+       </widget>
+      </item>
+     </layout>
+    </widget>
   </item>
   <item>
    <widget class="QPlainTextEdit" name="textbox"/>
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -10,9 +10,9 @@ from functools import partial
 from future_builtins import map
 from collections import OrderedDict

-from PyQt4.Qt import (QTableView, Qt, QAbstractItemView, QMenu, pyqtSignal,
-    QModelIndex, QIcon, QItemSelection, QMimeData, QDrag, QApplication,
-    QPoint, QPixmap, QUrl, QImage, QPainter, QColor, QRect)
+from PyQt4.Qt import (QTableView, Qt, QAbstractItemView, QMenu, pyqtSignal, QFont,
+    QModelIndex, QIcon, QItemSelection, QMimeData, QDrag, QApplication, QStyle,
+    QPoint, QPixmap, QUrl, QImage, QPainter, QColor, QRect, QHeaderView, QStyleOptionHeader)

 from calibre.gui2.library.delegates import (RatingDelegate, PubDateDelegate,
    TextDelegate, DateDelegate, CompleteDelegate, CcTextDelegate,
@ -25,6 +25,54 @@ from calibre.gui2.library import DEFAULT_SORT
 from calibre.constants import filesystem_encoding
 from calibre import force_unicode

+class HeaderView(QHeaderView):  # {{{
+
+    def __init__(self, *args):
+        QHeaderView.__init__(self, *args)
+        self.hover = -1
+        self.current_font = QFont(self.font())
+        self.current_font.setBold(True)
+        self.current_font.setItalic(True)
+
+    def event(self, e):
+        if e.type() in (e.HoverMove, e.HoverEnter):
+            self.hover = self.logicalIndexAt(e.pos())
+        elif e.type() in (e.Leave, e.HoverLeave):
+            self.hover = -1
+        return QHeaderView.event(self, e)
+
+    def paintSection(self, painter, rect, logical_index):
+        opt = QStyleOptionHeader()
+        self.initStyleOption(opt)
+        opt.rect = rect
+        opt.section = logical_index
+        opt.orientation = self.orientation()
+        opt.textAlignment = Qt.AlignHCenter | Qt.AlignVCenter
+        model = self.parent().model()
+        opt.text = model.headerData(logical_index, opt.orientation, Qt.DisplayRole).toString()
+        if self.isSortIndicatorShown() and self.sortIndicatorSection() == logical_index:
+            opt.sortIndicator = QStyleOptionHeader.SortDown if self.sortIndicatorOrder() == Qt.AscendingOrder else QStyleOptionHeader.SortUp
+        opt.text = opt.fontMetrics.elidedText(opt.text, Qt.ElideRight, rect.width() - 4)
+        if self.isEnabled():
+            opt.state |= QStyle.State_Enabled
+            if self.window().isActiveWindow():
+                opt.state |= QStyle.State_Active
+                if self.hover == logical_index:
+                    opt.state |= QStyle.State_MouseOver
+        sm = self.selectionModel()
+        if opt.orientation == Qt.Vertical:
+            if sm.isRowSelected(logical_index, QModelIndex()):
+                opt.state |= QStyle.State_Sunken
+
+        painter.save()
+        if (
+                (opt.orientation == Qt.Horizontal and sm.currentIndex().column() == logical_index) or
+                (opt.orientation == Qt.Vertical and sm.currentIndex().row() == logical_index)):
+            painter.setFont(self.current_font)
+        self.style().drawControl(QStyle.CE_Header, opt, painter, self)
+        painter.restore()
+# }}}
+
 class PreserveViewState(object):  # {{{

    '''
@ -72,7 +120,8 @@ class PreserveViewState(object): # {{{
            return {x:getattr(self, x) for x in ('selected_ids', 'current_id',
                'vscroll', 'hscroll')}
        def fset(self, state):
-            for k, v in state.iteritems(): setattr(self, k, v)
+            for k, v in state.iteritems():
+                setattr(self, k, v)
            self.__exit__()
        return property(fget=fget, fset=fset)

@ -90,6 +139,7 @@ class BooksView(QTableView): # {{{

    def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True):
        QTableView.__init__(self, parent)
+        self.setProperty('highlight_current_item', 150)
        self.row_sizing_done = False

        if not tweaks['horizontal_scrolling_per_column']:
@ -152,12 +202,16 @@ class BooksView(QTableView): # {{{
        # {{{ Column Header setup
        self.can_add_columns = True
        self.was_restored = False
-        self.column_header = self.horizontalHeader()
+        self.column_header = HeaderView(Qt.Horizontal, self)
+        self.setHorizontalHeader(self.column_header)
        self.column_header.setMovable(True)
+        self.column_header.setClickable(True)
        self.column_header.sectionMoved.connect(self.save_state)
        self.column_header.setContextMenuPolicy(Qt.CustomContextMenu)
        self.column_header.customContextMenuRequested.connect(self.show_column_header_context_menu)
        self.column_header.sectionResized.connect(self.column_resized, Qt.QueuedConnection)
+        self.row_header = HeaderView(Qt.Vertical, self)
+        self.setVerticalHeader(self.row_header)
        # }}}

        self._model.database_changed.connect(self.database_changed)
@ -197,6 +251,16 @@ class BooksView(QTableView): # {{{
        elif action.startswith('align_'):
            alignment = action.partition('_')[-1]
            self._model.change_alignment(column, alignment)
+        elif action == 'quickview':
+            from calibre.customize.ui import find_plugin
+            qv = find_plugin('Show Quickview')
+            if qv:
+                rows = self.selectionModel().selectedRows()
+                if len(rows) > 0:
+                    current_row = rows[0].row()
+                    current_col = self.column_map.index(column)
+                    index = self.model().index(current_row, current_col)
+                    qv.actual_plugin_.change_quickview_column(index)

        self.save_state()

@ -225,7 +289,7 @@ class BooksView(QTableView): # {{{
                ac.setCheckable(True)
                ac.setChecked(True)
            if col not in ('ondevice', 'inlibrary') and \
-                    (not self.model().is_custom_column(col) or \
+                    (not self.model().is_custom_column(col) or
                    self.model().custom_columns[col]['datatype'] not in ('bool',
                        )):
                m = self.column_header_context_menu.addMenu(
@ -240,7 +304,14 @@ class BooksView(QTableView): # {{{
                            a.setCheckable(True)
                            a.setChecked(True)

-
+            if self._model.db.field_metadata[col]['is_category']:
+                act = self.column_header_context_menu.addAction(_('Quickview column %s') %
+                        name,
+                    partial(self.column_header_context_handler, action='quickview',
+                        column=col))
+                rows = self.selectionModel().selectedRows()
+                if len(rows) > 1:
+                    act.setEnabled(False)

            hidden_cols = [self.column_map[i] for i in
                    range(self.column_header.count()) if
@ -260,7 +331,6 @@ class BooksView(QTableView): # {{{
                        partial(self.column_header_context_handler,
                        action='show', column=col))

-
            self.column_header_context_menu.addSeparator()
            self.column_header_context_menu.addAction(
                    _('Shrink column if it is too wide to fit'),
@ -497,7 +567,6 @@ class BooksView(QTableView): # {{{
                        db.prefs[name] = ans
        return ans

-
    def restore_state(self):
        old_state = self.get_old_state()
        if old_state is None:
@ -820,7 +889,8 @@ class BooksView(QTableView): # {{{
        ids = frozenset(ids)
        m = self.model()
        for row in xrange(m.rowCount(QModelIndex())):
-            if len(row_map) >= len(ids): break
+            if len(row_map) >= len(ids):
+                break
            c = m.id(row)
            if c in ids:
                row_map[c] = row
@ -880,7 +950,8 @@ class BooksView(QTableView): # {{{
                pass
            return None
        def fset(self, val):
-            if val is None: return
+            if val is None:
+                return
            m = self.model()
            for row in xrange(m.rowCount(QModelIndex())):
                if m.id(row) == val:
@ -902,7 +973,8 @@ class BooksView(QTableView): # {{{
        column = ci.column()

        for i in xrange(ci.row()+1, self.row_count()):
-            if i in selected_rows: continue
+            if i in selected_rows:
+                continue
            try:
                return self.model().id(self.model().index(i, column))
            except:
@ -910,7 +982,8 @@ class BooksView(QTableView): # {{{

        # No unselected rows after the current row, look before
        for i in xrange(ci.row()-1, -1, -1):
-            if i in selected_rows: continue
+            if i in selected_rows:
+                continue
            try:
                return self.model().id(self.model().index(i, column))
            except:
--- a/src/calibre/gui2/preferences/coloring.py
+++ b/src/calibre/gui2/preferences/coloring.py
@ -636,10 +636,20 @@ class RulesModel(QAbstractListModel): # {{{

    def rule_to_html(self, kind, col, rule):
        if not isinstance(rule, Rule):
+            if kind == 'color':
                return _('''
                <p>Advanced Rule for column <b>%(col)s</b>:
                <pre>%(rule)s</pre>
                ''')%dict(col=col, rule=prepare_string_for_xml(rule))
+            else:
+                return _('''
+                <p>Advanced Rule: set <b>%(typ)s</b> for column <b>%(col)s</b>:
+                <pre>%(rule)s</pre>
+                ''')%dict(col=col,
+                          typ=icon_rule_kinds[0][0]
+                            if kind == icon_rule_kinds[0][1] else icon_rule_kinds[1][0],
+                          rule=prepare_string_for_xml(rule))
+
        conditions = [self.condition_to_html(c) for c in rule.conditions]

        trans_kind = 'not found'
@ -761,7 +771,7 @@ class EditRules(QWidget): # {{{
                ' what icon to use. Click the Add Rule button below'
                ' to get started.<p>You can <b>change an existing rule</b> by'
                ' double clicking it.'))
-            self.add_advanced_button.setVisible(False)
+#             self.add_advanced_button.setVisible(False)

    def add_rule(self):
        d = RuleEditor(self.model.fm, self.pref_name)
@ -774,6 +784,7 @@ class EditRules(QWidget): # {{{
                self.changed.emit()

    def add_advanced(self):
+        if self.pref_name == 'column_color_rules':
            td = TemplateDialog(self, '', mi=self.mi, fm=self.fm, color_field='')
            if td.exec_() == td.Accepted:
                col, r = td.rule
@ -781,6 +792,15 @@ class EditRules(QWidget): # {{{
                    idx = self.model.add_rule('color', col, r)
                    self.rules_view.scrollTo(idx)
                    self.changed.emit()
+        else:
+            td = TemplateDialog(self, '', mi=self.mi, fm=self.fm, icon_field_key='')
+            if td.exec_() == td.Accepted:
+                print(td.rule)
+                typ, col, r = td.rule
+                if typ and r and col:
+                    idx = self.model.add_rule(typ, col, r)
+                    self.rules_view.scrollTo(idx)
+                    self.changed.emit()

    def edit_rule(self, index):
        try:
@ -790,8 +810,12 @@ class EditRules(QWidget): # {{{
        if isinstance(rule, Rule):
            d = RuleEditor(self.model.fm, self.pref_name)
            d.apply_rule(kind, col, rule)
-        else:
+        elif self.pref_name == 'column_color_rules':
            d = TemplateDialog(self, rule, mi=self.mi, fm=self.fm, color_field=col)
+        else:
+            d = TemplateDialog(self, rule, mi=self.mi, fm=self.fm, icon_field_key=col,
+                               icon_rule_kind=kind)
+
        if d.exec_() == d.Accepted:
            if len(d.rule) == 2: # Convert template dialog rules to a triple
                d.rule = ('color', d.rule[0], d.rule[1])
--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@ -172,7 +172,10 @@ class Tweaks(QAbstractListModel, SearchQueryParser): # {{{
            doc.append(line[1:].strip())
        doc = '\n'.join(doc)
        while True:
+            try:
                line = lines[pos]
+            except IndexError:
+                break
            if not line.strip():
                break
            spidx1 = line.find(' ')
--- a/src/calibre/gui2/store/stores/koobe_plugin.py
+++ b/src/calibre/gui2/store/stores/koobe_plugin.py
@ -8,7 +8,6 @@ __copyright__ = '2013, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'

 import urllib
-from base64 import b64encode
 from contextlib import closing

 from lxml import html
--- a/src/calibre/gui2/store/stores/woblink_plugin.py
+++ b/src/calibre/gui2/store/stores/woblink_plugin.py
@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'

 import re
 import urllib
-from base64 import b64encode
 from contextlib import closing

 from lxml import html
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -113,7 +113,7 @@ class KindleDX(Kindle):
    id = 'kindledx'

 class KindleFire(KindleDX):
-    name = 'Kindle Fire'
+    name = 'Kindle Fire and Fire HD'
    id = 'kindle_fire'
    output_profile = 'kindle_fire'
    supports_color = True
@ -431,7 +431,8 @@ class KindlePage(QWizardPage, KindleUI):
            default = ac[2]
            if x.strip().endswith('@kindle.com'):
                accs.append((x, default))
-                if default: has_default = True
+                if default:
+                    has_default = True
        if has_default:
            accs = [x for x in accs if x[1]]
        if accs:
@ -450,7 +451,8 @@ class KindlePage(QWizardPage, KindleUI):
        if (len(parts) >= 2 and parts[0] and self.send_email_widget.set_email_settings(True)):
            conf = smtp_prefs()
            accounts = conf.parse().accounts
-            if not accounts: accounts = {}
+            if not accounts:
+                accounts = {}
            for y in accounts.values():
                y[2] = False
            accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1', True, True]
@ -484,9 +486,9 @@ class StanzaPage(QWizardPage, StanzaUI):
            c = server_config()
            c.set('port', p)

-
    def set_port(self, *args):
-        if not self.content_server.isChecked(): return
+        if not self.content_server.isChecked():
+            return
        import socket
        s = socket.socket()
        with closing(s):
@ -518,8 +520,7 @@ class DevicePage(QWizardPage, DeviceUI):
        self.manufacturer_view.setModel(self.man_model)
        previous = dynamic.get('welcome_wizard_device', False)
        if previous:
-            previous = [x for x in get_devices() if \
-                    x.id == previous]
+            previous = [x for x in get_devices() if x.id == previous]
            if not previous:
                previous = [Device]
            previous = previous[0]
@ -841,7 +842,6 @@ class FinishPage(QWizardPage, FinishUI):
        pass


-
 class Wizard(QWizard):

    BUTTON_TEXTS = {
@ -859,7 +859,6 @@ class Wizard(QWizard):
            _('&Finish')
            _('Commit')

-
    def __init__(self, parent):
        QWizard.__init__(self, parent)
        self.setWindowTitle(__appname__+' '+_('welcome wizard'))
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@ -47,6 +47,7 @@ FORMAT_ARG_DESCS = dict(
        pubdate=_('The published date'),
        last_modified=_('The date when the metadata for this book record'
            ' was last modified'),
+        languages=_('The language(s) of this book'),
        id=_('The calibre internal id')
        )

@ -283,7 +284,6 @@ def save_book_to_disk(id_, db, root, opts, length):
                pass


-
 def do_save_book_to_disk(id_, mi, cover, plugboards,
        format_map, root, opts, length):
    from calibre.ebooks.metadata.meta import set_metadata
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -61,7 +61,6 @@ class ContentServer(object):
                 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
        return lm.replace('month', month[updated.month])

-
    def sort(self, items, field, order):
        field = self.db.data.sanitize_sort_field_name(field)
        if field not in self.db.field_metadata.sortable_field_keys():
@ -77,7 +76,7 @@ class ContentServer(object):
        try:
            id = int(id)
        except ValueError:
-            id = id.rpartition('_')[-1].partition('.')[0]
+            id = id.rpartition('.')[0].rpartition('_')[-1]
            match = re.search(r'\d+', id)
            if not match:
                raise cherrypy.HTTPError(404, 'id:%s not an integer'%id)
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/ber.po
+++ b/src/calibre/translations/ber.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/Show More
+++ b/Show More