0.9.30

2026-01-06 12:10:18 -05:00 · 2013-05-10 07:09:52 -06:00 · 2013-05-10 07:09:52 -06:00 · b61e295fd5
commit b61e295fd5
parent 782ab4e938 3a021b5873
165 changed files with 43043 additions and 35672 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,60 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.30
+  date: 2013-05-10
+
+  new features:
+    - title: "Kobo driver: Add support for showing 'Archived' books on the device. Also up the supported firmware version to 2.5.3."
+      tickets: [1177677]
+
+    - title: "Driver for Blackberry 9790"
+      tickets: [1176607]
+
+    - title: "Add a tweak to turn off the highlighting of the book count when using a virtual library (Preferences->Tweaks)"
+
+    - title: "Add a button to clear the viewer search history in the viewer Preferences, under Miscellaneous"
+
+    - title: "Add keyboard shortcuts to clear the virtual Library and the additional restriction (Ctrl+Esc and Alt+Esc). Also use Shift+Esc to bring keyboard focus back tot he book list. Can be changed under Preferences->Keyboard"
+
+    - title: "Docx metadata: Read the language of the file, if present"
+ 
+  bug fixes:
+    - title: "Kobo driver: Fix unable to read SD card on OS X/Linux"
+      tickets: [1174815]
+
+    - title: "Content server: Fix unable to download ORIGINAL_* formats"
+      tickets: [1177158]
+
+    - title: "Fix regression that broke searching for terms containing a quote mark"
+      tickets: [1177114]
+
+    - title: "Fix regression that broke conversion of txt files when no input encoding is specified"
+      tickets: [1176622]
+
+    - title: "When changing to a virtual library, refresh the Book Details panel."
+      tickets: [1176296]
+
+    - title: "Fix regression that caused searching for user categories to break."
+      tickets: [1176187]
+
+    - title: "Fix error when downloading only covers and reviewing downloaded metadata."
+      tickets: [1176253]
+
+    - title: "MOBI metadata: Strip XML unsafe unicode codepoints when reading metadata from MOBI files."
+      tickets: [1175965]
+
+    - title: "Txt Input: Use the gbk encoding for txt files with detected encoding of gb2312."
+      tickets: [1175974]
+
+    - title: "When pressing Ctrl+Home/End preserve the horizontal scroll position in the book list"
+
+  improved recipes:
+    - NSFW
+    - Go Comics
+    - Various Polish news sources
+    - The Sun
+
 - version: 0.9.29
  date: 2013-05-03

--- a/manual/gui.rst
+++ b/manual/gui.rst
@ -582,6 +582,12 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
      - Open the advanced search dialog
    * - :kbd:`Esc`
      - Clear the current search
+    * - :kbd:`Shift+Esc`
+      - Focus the book list
+    * - :kbd:`Ctrl+Esc`
+      - Clear the virtual library
+    * - :kbd:`Alt+Esc`
+      - Clear the additional restriction
    * - :kbd:`N or F3`
      - Find the next book that matches the current search (only works if the highlight checkbox next to the search bar is checked)
    * - :kbd:`Shift+N or Shift+F3`
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -12,14 +12,17 @@ class BenchmarkPl(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
+    extra_css = 'ul {list-style-type: none;}'
    no_stylesheets = True
-    remove_attributes = ['style']
+    #remove_attributes = ['style']
    preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
-    keep_only_tags = [dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
+
+    keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
    remove_tags_after = dict(id='article')
    remove_tags = [dict(name='div', attrs={'class':['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs = {'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
+
    INDEX = 'http://www.benchmark.pl'
-    feeds          = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'), 
+    feeds          = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
                          (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]


@ -42,46 +45,16 @@ class BenchmarkPl(BasicNewsRecipe):
        for r in appendtag.findAll(attrs={'class':'changePage'}):
            r.extract()

-
-    def image_article(self, soup, appendtag):
-        nexturl = soup.find('div', attrs={'class':'preview'})
-        if nexturl:
-            nexturl = nexturl.find('a', attrs={'class':'move_next'})
-            image = appendtag.find('div', attrs={'class':'preview'}).div['style'][16:]
-            image = self.INDEX + image[:image.find("')")]
-            appendtag.find(attrs={'class':'preview'}).name='img'
-            appendtag.find(attrs={'class':'preview'})['src']=image
-            appendtag.find('a', attrs={'class':'move_next'}).extract()
-        while nexturl:
-            nexturl = self.INDEX + nexturl['href']
-            soup2 = self.index_to_soup(nexturl)
-            nexturl = soup2.find('a', attrs={'class':'move_next'})
-            image = soup2.find('div', attrs={'class':'preview'}).div['style'][16:]
-            image = self.INDEX + image[:image.find("')")]
-            soup2.find(attrs={'class':'preview'}).name='img'
-            soup2.find(attrs={'class':'preview'})['src']=image
-            pagetext = soup2.find('div', attrs={'class':'gallery'})
-            pagetext.find('div', attrs={'class':'title'}).extract()
-            pagetext.find('div', attrs={'class':'thumb'}).extract()
-            pagetext.find('div', attrs={'class':'panelOcenaObserwowane'}).extract()       
-            if nexturl:
-                pagetext.find('a', attrs={'class':'move_next'}).extract()
-            pagetext.find('a', attrs={'class':'move_back'}).extract()
-            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
-            for comment in comments:
-                comment.extract()
-            pos = len(appendtag.contents)
-            appendtag.insert(pos, pagetext)
-            
-
    def preprocess_html(self, soup):
-        if soup.find('div', attrs={'class':'preview'}):
-            self.image_article(soup, soup.body)
-        else:
-            self.append_page(soup, soup.body)
+        self.append_page(soup, soup.body)
        for a in soup('a'):
            if a.has_key('href') and not a['href'].startswith('http'):
                a['href'] = self.INDEX + a['href']
        for r in soup.findAll(attrs={'class':['comments', 'body']}):
            r.extract()
+        tag1 = soup.find(attrs={'class':'inlineGallery'})
+        if tag1:
+            for tag in tag1.findAll('li'):
+                tag['style'] = 'float: left; margin-right: 10px;'
+            tag1.findNext('p')['style'] = 'clear: both;'
        return soup
--- a/recipes/comics_com.recipe
+++ b/recipes/comics_com.recipe
@ -1,224 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class Comics(BasicNewsRecipe):
-    title               = 'Comics.com'
-    __author__          = 'Starson17'
-    description         = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
-    language            = 'en'
-    use_embedded_content= False
-    no_stylesheets      = True
-    oldest_article      = 24
-    remove_javascript   = True
-    cover_url           = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
-    recursions          = 0
-    max_articles_per_feed = 10
-    num_comics_to_get = 7
-    simultaneous_downloads = 1
-    # delay = 3
-
-    keep_only_tags     = [dict(name='a', attrs={'class':'STR_StripImage'}),
-                          dict(name='div', attrs={'class':'STR_Date'})
-                          ]
-
-    def parse_index(self):
-        feeds = []
-        for title, url in [
-                            ("9 Chickweed Lane", "http://comics.com/9_chickweed_lane"),
-                            ("Agnes", "http://comics.com/agnes"),
-                            ("Alley Oop", "http://comics.com/alley_oop"),
-                            ("Andy Capp", "http://comics.com/andy_capp"),
-                            ("Arlo & Janis", "http://comics.com/arlo&janis"),
-                            ("B.C.", "http://comics.com/bc"),
-                            ("Ballard Street", "http://comics.com/ballard_street"),
-                            # ("Ben", "http://comics.com/ben"),
-                            # ("Betty", "http://comics.com/betty"),
-                            # ("Big Nate", "http://comics.com/big_nate"),
-                            # ("Brevity", "http://comics.com/brevity"),
-                            # ("Candorville", "http://comics.com/candorville"),
-                            # ("Cheap Thrills", "http://comics.com/cheap_thrills"),
-                            # ("Committed", "http://comics.com/committed"),
-                            # ("Cow & Boy", "http://comics.com/cow&boy"),
-                            # ("Daddy's Home", "http://comics.com/daddys_home"),
-                            # ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
-                            # ("Drabble", "http://comics.com/drabble"),
-                            # ("F Minus", "http://comics.com/f_minus"),
-                            # ("Family Tree", "http://comics.com/family_tree"),
-                            # ("Farcus", "http://comics.com/farcus"),
-                            # ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
-                            # ("Ferd'nand", "http://comics.com/ferdnand"),
-                            # ("Flight Deck", "http://comics.com/flight_deck"),
-                            # ("Flo & Friends", "http://comics.com/flo&friends"),
-                            # ("Fort Knox", "http://comics.com/fort_knox"),
-                            # ("Frank & Ernest", "http://comics.com/frank&ernest"),
-                            # ("Frazz", "http://comics.com/frazz"),
-                            # ("Free Range", "http://comics.com/free_range"),
-                            # ("Geech Classics", "http://comics.com/geech_classics"),
-                            # ("Get Fuzzy", "http://comics.com/get_fuzzy"),
-                            # ("Girls & Sports", "http://comics.com/girls&sports"),
-                            # ("Graffiti", "http://comics.com/graffiti"),
-                            # ("Grand Avenue", "http://comics.com/grand_avenue"),
-                            # ("Heathcliff", "http://comics.com/heathcliff"),
-                            # "Heathcliff, a street-smart and mischievous cat with many adventures."
-                            # ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
-                            # ("Herman", "http://comics.com/herman"),
-                            # ("Home and Away", "http://comics.com/home_and_away"),
-                            # ("It's All About You", "http://comics.com/its_all_about_you"),
-                            # ("Jane's World", "http://comics.com/janes_world"),
-                            # ("Jump Start", "http://comics.com/jump_start"),
-                            # ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
-                            # ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
-                            # ("Liberty Meadows", "http://comics.com/liberty_meadows"),
-                            # ("Little Dog Lost", "http://comics.com/little_dog_lost"),
-                            # ("Lola", "http://comics.com/lola"),
-                            # ("Luann", "http://comics.com/luann"),
-                            # ("Marmaduke", "http://comics.com/marmaduke"),
-                            # ("Meg! Classics", "http://comics.com/meg_classics"),
-                            # ("Minimum Security", "http://comics.com/minimum_security"),
-                            # ("Moderately Confused", "http://comics.com/moderately_confused"),
-                            # ("Momma", "http://comics.com/momma"),
-                            # ("Monty", "http://comics.com/monty"),
-                            # ("Motley Classics", "http://comics.com/motley_classics"),
-                            # ("Nancy", "http://comics.com/nancy"),
-                            # ("Natural Selection", "http://comics.com/natural_selection"),
-                            # ("Nest Heads", "http://comics.com/nest_heads"),
-                            # ("Off The Mark", "http://comics.com/off_the_mark"),
-                            # ("On a Claire Day", "http://comics.com/on_a_claire_day"),
-                            # ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
-                            # ("Over the Hedge", "http://comics.com/over_the_hedge"),
-                            # ("PC and Pixel", "http://comics.com/pc_and_pixel"),
-                            # ("Peanuts", "http://comics.com/peanuts"),
-                            # ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
-                            # ("Pickles", "http://comics.com/pickles"),
-                            # ("Prickly City", "http://comics.com/prickly_city"),
-                            # ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
-                            # ("Reality Check", "http://comics.com/reality_check"),
-                            # ("Red & Rover", "http://comics.com/red&rover"),
-                            # ("Rip Haywire", "http://comics.com/rip_haywire"),
-                            # ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
-                            # ("Rose Is Rose", "http://comics.com/rose_is_rose"),
-                            # ("Rubes", "http://comics.com/rubes"),
-                            # ("Rudy Park", "http://comics.com/rudy_park"),
-                            # ("Scary Gary", "http://comics.com/scary_gary"),
-                            # ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
-                            # ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
-                            # ("Speed Bump", "http://comics.com/speed_bump"),
-                            # ("Spot The Frog", "http://comics.com/spot_the_frog"),
-                            # ("State of the Union", "http://comics.com/state_of_the_union"),
-                            # ("Strange Brew", "http://comics.com/strange_brew"),
-                            # ("Tarzan Classics", "http://comics.com/tarzan_classics"),
-                            # ("That's Life", "http://comics.com/thats_life"),
-                            # ("The Barn", "http://comics.com/the_barn"),
-                            # ("The Born Loser", "http://comics.com/the_born_loser"),
-                            # ("The Buckets", "http://comics.com/the_buckets"),
-                            # ("The Dinette Set", "http://comics.com/the_dinette_set"),
-                            # ("The Grizzwells", "http://comics.com/the_grizzwells"),
-                            # ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
-                            # ("The Knight Life", "http://comics.com/the_knight_life"),
-                            # ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
-                            # ("The Other Coast", "http://comics.com/the_other_coast"),
-                            # ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
-                            # ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
-                            # ("Watch Your Head", "http://comics.com/watch_your_head"),
-                            # ("Wizard of Id", "http://comics.com/wizard_of_id"),
-                            # ("Working Daze", "http://comics.com/working_daze"),
-                            # ("Working It Out", "http://comics.com/working_it_out"),
-                            # ("Zack Hill", "http://comics.com/zack_hill"),
-                            # ("(Th)ink", "http://comics.com/think"),
-                            # "Tackling the political and social issues impacting communities of color."
-                            # ("Adam Zyglis", "http://comics.com/adam_zyglis"),
-                            # "Known for his excellent caricatures, as well as independent and incisive imagery. "
-                            # ("Andy Singer", "http://comics.com/andy_singer"),
-                            # ("Bill Day", "http://comics.com/bill_day"),
-                            # "Powerful images on sensitive issues."
-                            # ("Bill Schorr", "http://comics.com/bill_schorr"),
-                            # ("Bob Englehart", "http://comics.com/bob_englehart"),
-                            # ("Brian Fairrington", "http://comics.com/brian_fairrington"),
-                            # ("Bruce Beattie", "http://comics.com/bruce_beattie"),
-                            # ("Cam Cardow", "http://comics.com/cam_cardow"),
-                            # ("Chip Bok", "http://comics.com/chip_bok"),
-                            # ("Chris Britt", "http://comics.com/chris_britt"),
-                            # ("Chuck Asay", "http://comics.com/chuck_asay"),
-                            # ("Clay Bennett", "http://comics.com/clay_bennett"),
-                            # ("Daryl Cagle", "http://comics.com/daryl_cagle"),
-                            # ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
-                            # "David Fitzsimmons is a new editorial cartoons on comics.com.  He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
-                            # ("Drew Litton", "http://comics.com/drew_litton"),
-                            # "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
-                            # ("Ed Stein", "http://comics.com/ed_stein"),
-                            # "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
-                            # ("Eric Allie", "http://comics.com/eric_allie"),
-                            # "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
-                            # ("Gary Markstein", "http://comics.com/gary_markstein"),
-                            # ("Gary McCoy", "http://comics.com/gary_mccoy"),
-                            # "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for  Best  Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
-                            # ("Gary Varvel", "http://comics.com/gary_varvel"),
-                            # ("Henry Payne", "http://comics.com/henry_payne"),
-                            # ("JD Crowe", "http://comics.com/jd_crowe"),
-                            # ("Jeff Parker", "http://comics.com/jeff_parker"),
-                            # ("Jeff Stahler", "http://comics.com/jeff_stahler"),
-                            # ("Jerry Holbert", "http://comics.com/jerry_holbert"),
-                            # ("John Cole", "http://comics.com/john_cole"),
-                            # ("John Darkow", "http://comics.com/john_darkow"),
-                            # "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for  the Columbia Daily Tribune, Missouri"
-                            # ("John Sherffius", "http://comics.com/john_sherffius"),
-                            # ("Larry Wright", "http://comics.com/larry_wright"),
-                            # ("Lisa Benson", "http://comics.com/lisa_benson"),
-                            # ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
-                            # ("Matt Bors", "http://comics.com/matt_bors"),
-                            # ("Michael Ramirez", "http://comics.com/michael_ramirez"),
-                            # ("Mike Keefe", "http://comics.com/mike_keefe"),
-                            # ("Mike Luckovich", "http://comics.com/mike_luckovich"),
-                            # ("MIke Thompson", "http://comics.com/mike_thompson"),
-                            # ("Monte Wolverton", "http://comics.com/monte_wolverton"),
-                            # "Unique mix of perspectives"
-                            # ("Mr. Fish", "http://comics.com/mr_fish"),
-                            # "Side effects may include swelling"
-                            # ("Nate Beeler", "http://comics.com/nate_beeler"),
-                            # "Middle America meets the Beltway."
-                            # ("Nick Anderson", "http://comics.com/nick_anderson"),
-                            # ("Pat Bagley", "http://comics.com/pat_bagley"),
-                            # "Unfair and Totally Unbalanced."
-                            # ("Paul Szep", "http://comics.com/paul_szep"),
-                            # ("RJ Matson", "http://comics.com/rj_matson"),
-                            # "Power cartoons from NYC and Capitol Hill"
-                            # ("Rob Rogers", "http://comics.com/rob_rogers"),
-                            # "Humorous slant on current events"
-                            # ("Robert Ariail", "http://comics.com/robert_ariail"),
-                            # "Clever and unpredictable"
-                            # ("Scott Stantis", "http://comics.com/scott_stantis"),
-                            # ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
-                            # ("Steve Benson", "http://comics.com/steve_benson"),
-                            # ("Steve Breen", "http://comics.com/steve_breen"),
-                            # ("Steve Kelley", "http://comics.com/steve_kelley"),
-                            # ("Steve Sack", "http://comics.com/steve_sack"),
-                            ]:
-            articles = self.make_links(url)
-            if articles:
-                feeds.append((title, articles))
-        return feeds
-
-    def make_links(self, url):
-        soup = self.index_to_soup(url)
-        # print 'soup: ', soup
-        title = ''
-        current_articles = []
-        pages = range(1, self.num_comics_to_get+1)
-        for page in pages:
-            page_url = url + '/?Page=' + str(page)
-            soup = self.index_to_soup(page_url)
-            if soup:
-                strip_tag = soup.find('a', attrs={'class': 'STR_StripImage'})
-                if strip_tag:
-                  print 'strip_tag: ', strip_tag
-                  title = strip_tag['title']
-                  print 'title: ', title
-            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
-        current_articles.reverse()
-        return current_articles
-
-    extra_css = '''
-                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
-		'''
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -16,7 +16,7 @@ class Computerworld_pl(BasicNewsRecipe):
    preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),]
    keep_only_tags = [dict(id=['szpaltaL', 's2011'])]
    remove_tags_after = dict(name='div', attrs={'class':'tresc'})
-    remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}),]
+    remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}), dict(name='a', attrs={'target':'_blank'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]

    def skip_ad_pages(self, soup):
--- a/recipes/conowego_pl.recipe
+++ b/recipes/conowego_pl.recipe
@ -15,6 +15,7 @@ class CoNowegoPl(BasicNewsRecipe):
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = False
+    ignore_duplicate_articles = {'title', 'url'}
    keep_only_tags = [dict(name='div', attrs={'class':'news_list single_view'})]
    remove_tags = [dict(name='div', attrs={'class':['ni_bottom', 'ni_rank', 'ni_date']})]
    feeds          = [(u'Aktualno\u015bci', u'http://www.conowego.pl/rss/aktualnosci-5/?type=100'), (u'Gaming', u'http://www.conowego.pl/rss/gaming-6/?type=100'), (u'Porady', u'http://www.conowego.pl/rss/porady-3/?type=100'), (u'Testy', u'http://www.conowego.pl/rss/testy-2/?type=100')]
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
+# vim:fileencoding=UTF-8

 __license__     = 'GPL v3'
 __author__ = 'Mori'
@ -14,7 +15,7 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
    __author__ = 'Mori'
    language = 'pl'

-    title = u'Dziennik Internautow'
+    title = u'Dziennik Internautów'
    publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
    description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'

--- a/recipes/dziennik_lodzki.recipe
+++ b/recipes/dziennik_lodzki.recipe
@ -16,7 +16,7 @@ class DziennikLodzki(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/piano'})]

    feeds          = [(u'Na sygnale', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_nasygnale.xml?201302'), (u'\u0141\xf3d\u017a', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_lodz.xml?201302'), (u'Opinie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_opinie.xml?201302'), (u'Pieni\u0105dze', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533763/index.rss?201302'), (u'Kultura', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533762/index.rss?201302'), (u'Sport', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533761/index.rss?201302'), (u'Akcje', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_akcje.xml?201302'), (u'M\xf3j Reporter', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_mojreporter.xml?201302'), (u'Studni\xf3wki', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_studniowki.xml?201302'), (u'Kraj', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_kraj.xml?201302'), (u'Zdrowie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_zdrowie.xml?201302')]

--- a/recipes/dziennik_zachodni.recipe
+++ b/recipes/dziennik_zachodni.recipe
@ -16,7 +16,7 @@ class DziennikZachodni(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'}), dict(name='aside')]

    feeds          = [(u'Wszystkie', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533764/index.rss?201302'), (u'Wiadomo\u015bci', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533765/index.rss?201302'), (u'Regiony', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Opinie', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Blogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_blogi.xml?201302'), (u'Serwisy', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_serwisy.xml?201302'), (u'Sport', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533766/index.rss?201302'), (u'M\xf3j Reporter', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_mojreporter.xml?201302'), (u'Na narty', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_nanarty.xml?201302'), (u'Drogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_drogi.xml?201302'), (u'Pieni\u0105dze', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533768/index.rss?201302')]

--- a/recipes/echo_dnia.recipe
+++ b/recipes/echo_dnia.recipe
@ -16,6 +16,7 @@ class EchoDnia(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_empty_feeds = True
    no_stylesheets = True
+    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}

    preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), 
--- a/recipes/ekundelek_pl.recipe
+++ b/recipes/ekundelek_pl.recipe
@ -12,7 +12,7 @@ class swiatczytnikow(BasicNewsRecipe):
    __author__ = u'Artur Stachecki'
    oldest_article = 7
    max_articles_per_feed = 100
-
+    remove_empty_feeds = True
    remove_tags = [dict(name = 'div', attrs = {'class' : 'feedflare'})]

    feeds = [(u'Wpisy', u'http://feeds.feedburner.com/Ekundelekpl?format=xml')]
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -11,6 +11,7 @@ class eMuzyka(BasicNewsRecipe):
    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
+    remove_empty_feeds = True
    max_articles_per_feed = 100
    remove_attributes = ['style']
    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -9,6 +9,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
    category = 'newspaper'
    publication_type = 'newspaper'
+    #encoding = 'iso-8859-2'
    masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    INDEX = 'http://wyborcza.pl'
    remove_empty_feeds = True
@ -16,6 +17,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets = True
+    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
    remove_tags_before = dict(id='k0')
    remove_tags_after = dict(id='banP4')
@ -24,7 +26,19 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
             (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
             (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
             (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
-             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
+             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
+             (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
+             (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
+             (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
+             (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
+             (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
+             (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
+             (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
+             (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
+             (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
+             (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
+             (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'),
+             (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
             ]

    def skip_ad_pages(self, soup):
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@ -31,6 +31,14 @@ class Gildia(BasicNewsRecipe):
            for link in content.findAll(name='a'):
                if 'fragment' in link['href']:
                    return self.index_to_soup(link['href'], raw=True)
+        if 'relacj' in soup.title.string.lower():
+            for link in content.findAll(name='a'):
+                if 'relacj' in link['href']:
+                    return self.index_to_soup(link['href'], raw=True)
+        if 'wywiad' in soup.title.string.lower():
+            for link in content.findAll(name='a'):
+                if 'wywiad' in link['href']:
+                    return self.index_to_soup(link['href'], raw=True)


    def preprocess_html(self, soup):
--- a/recipes/glos_wielkopolski.recipe
+++ b/recipes/glos_wielkopolski.recipe
@ -16,7 +16,7 @@ class GlosWielkopolski(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href', 'http://www.gloswielkopolski.pl/newsletter/'})]

    feeds          = [(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')]

--- a/recipes/go_comics.recipe
+++ b/recipes/go_comics.recipe
@ -1,229 +1,443 @@
+__license__   = 'GPL v3'
+__copyright__ = 'Copyright 2010 Starson17'
+'''
+www.gocomics.com
+'''
 from calibre.web.feeds.news import BasicNewsRecipe
+import re

-
-class Comics(BasicNewsRecipe):
-    title               = 'Comics.com'
+class GoComics(BasicNewsRecipe):
+    title               = 'Go Comics'
    __author__          = 'Starson17'
-    description         = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
+    __version__         = '1.06'
+    __date__            = '07 June 2011'
+    description         = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
+    category            = 'news, comics'
    language            = 'en'
    use_embedded_content= False
    no_stylesheets      = True
-    oldest_article      = 24
    remove_javascript   = True
-    cover_url           = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
-    recursions          = 0
-    max_articles_per_feed = 10
-    num_comics_to_get = 7
-    simultaneous_downloads = 1
-    # delay = 3
+    remove_attributes = ['style']

-    keep_only_tags     = [dict(name='h1'),
-                          dict(name='p', attrs={'class':'feature_item'})
+    ####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
+    # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
+    num_comics_to_get = 1
+    # comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large
+    comic_size = 900
+    # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
+    # Please do not overload their servers by selecting all comics and 1000 strips from each!
+
+    conversion_options = {'linearize_tables'  : True
+                        , 'comment'           : description
+                        , 'tags'              : category
+                        , 'language'          : language
+                        }
+
+    keep_only_tags     = [dict(name='div', attrs={'class':['feature','banner']}),
                          ]

+    remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
+                   dict(name='div', attrs={'class':['tag-wrapper']}),
+                   dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
+                   dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
+                   dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
+                   ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.addheaders = [('Referer','http://www.gocomics.com/')]
+        return br
+
    def parse_index(self):
        feeds = []
        for title, url in [
-                            ("9 Chickweed Lane", "http://gocomics.com/9_chickweed_lane"),
-                            ("Agnes", "http://gocomics.com/agnes"),
-                            ("Alley Oop", "http://gocomics.com/alley_oop"),
-                            ("Andy Capp", "http://gocomics.com/andy_capp"),
-                            ("Arlo & Janis", "http://gocomics.com/arlo&janis"),
-                            ("B.C.", "http://gocomics.com/bc"),
-                            ("Ballard Street", "http://gocomics.com/ballard_street"),
-                            # ("Ben", "http://comics.com/ben"),
-                            # ("Betty", "http://comics.com/betty"),
-                            # ("Big Nate", "http://comics.com/big_nate"),
-                            # ("Brevity", "http://comics.com/brevity"),
-                            # ("Candorville", "http://comics.com/candorville"),
-                            # ("Cheap Thrills", "http://comics.com/cheap_thrills"),
-                            # ("Committed", "http://comics.com/committed"),
-                            # ("Cow & Boy", "http://comics.com/cow&boy"),
-                            # ("Daddy's Home", "http://comics.com/daddys_home"),
-                            # ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
-                            # ("Drabble", "http://comics.com/drabble"),
-                            # ("F Minus", "http://comics.com/f_minus"),
-                            # ("Family Tree", "http://comics.com/family_tree"),
-                            # ("Farcus", "http://comics.com/farcus"),
-                            # ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
-                            # ("Ferd'nand", "http://comics.com/ferdnand"),
-                            # ("Flight Deck", "http://comics.com/flight_deck"),
-                            # ("Flo & Friends", "http://comics.com/flo&friends"),
-                            # ("Fort Knox", "http://comics.com/fort_knox"),
-                            # ("Frank & Ernest", "http://comics.com/frank&ernest"),
-                            # ("Frazz", "http://comics.com/frazz"),
-                            # ("Free Range", "http://comics.com/free_range"),
-                            # ("Geech Classics", "http://comics.com/geech_classics"),
-                            # ("Get Fuzzy", "http://comics.com/get_fuzzy"),
-                            # ("Girls & Sports", "http://comics.com/girls&sports"),
-                            # ("Graffiti", "http://comics.com/graffiti"),
-                            # ("Grand Avenue", "http://comics.com/grand_avenue"),
-                            # ("Heathcliff", "http://comics.com/heathcliff"),
-                            # "Heathcliff, a street-smart and mischievous cat with many adventures."
-                            # ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
-                            # ("Herman", "http://comics.com/herman"),
-                            # ("Home and Away", "http://comics.com/home_and_away"),
-                            # ("It's All About You", "http://comics.com/its_all_about_you"),
-                            # ("Jane's World", "http://comics.com/janes_world"),
-                            # ("Jump Start", "http://comics.com/jump_start"),
-                            # ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
-                            # ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
-                            # ("Liberty Meadows", "http://comics.com/liberty_meadows"),
-                            # ("Little Dog Lost", "http://comics.com/little_dog_lost"),
-                            # ("Lola", "http://comics.com/lola"),
-                            # ("Luann", "http://comics.com/luann"),
-                            # ("Marmaduke", "http://comics.com/marmaduke"),
-                            # ("Meg! Classics", "http://comics.com/meg_classics"),
-                            # ("Minimum Security", "http://comics.com/minimum_security"),
-                            # ("Moderately Confused", "http://comics.com/moderately_confused"),
-                            # ("Momma", "http://comics.com/momma"),
-                            # ("Monty", "http://comics.com/monty"),
-                            # ("Motley Classics", "http://comics.com/motley_classics"),
-                            # ("Nancy", "http://comics.com/nancy"),
-                            # ("Natural Selection", "http://comics.com/natural_selection"),
-                            # ("Nest Heads", "http://comics.com/nest_heads"),
-                            # ("Off The Mark", "http://comics.com/off_the_mark"),
-                            # ("On a Claire Day", "http://comics.com/on_a_claire_day"),
-                            # ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
-                            # ("Over the Hedge", "http://comics.com/over_the_hedge"),
-                            # ("PC and Pixel", "http://comics.com/pc_and_pixel"),
-                            # ("Peanuts", "http://comics.com/peanuts"),
-                            # ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
-                            # ("Pickles", "http://comics.com/pickles"),
-                            # ("Prickly City", "http://comics.com/prickly_city"),
-                            # ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
-                            # ("Reality Check", "http://comics.com/reality_check"),
-                            # ("Red & Rover", "http://comics.com/red&rover"),
-                            # ("Rip Haywire", "http://comics.com/rip_haywire"),
-                            # ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
-                            # ("Rose Is Rose", "http://comics.com/rose_is_rose"),
-                            # ("Rubes", "http://comics.com/rubes"),
-                            # ("Rudy Park", "http://comics.com/rudy_park"),
-                            # ("Scary Gary", "http://comics.com/scary_gary"),
-                            # ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
-                            # ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
-                            # ("Speed Bump", "http://comics.com/speed_bump"),
-                            # ("Spot The Frog", "http://comics.com/spot_the_frog"),
-                            # ("State of the Union", "http://comics.com/state_of_the_union"),
-                            # ("Strange Brew", "http://comics.com/strange_brew"),
-                            # ("Tarzan Classics", "http://comics.com/tarzan_classics"),
-                            # ("That's Life", "http://comics.com/thats_life"),
-                            # ("The Barn", "http://comics.com/the_barn"),
-                            # ("The Born Loser", "http://comics.com/the_born_loser"),
-                            # ("The Buckets", "http://comics.com/the_buckets"),
-                            # ("The Dinette Set", "http://comics.com/the_dinette_set"),
-                            # ("The Grizzwells", "http://comics.com/the_grizzwells"),
-                            # ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
-                            # ("The Knight Life", "http://comics.com/the_knight_life"),
-                            # ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
-                            # ("The Other Coast", "http://comics.com/the_other_coast"),
-                            # ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
-                            # ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
-                            # ("Watch Your Head", "http://comics.com/watch_your_head"),
-                            # ("Wizard of Id", "http://comics.com/wizard_of_id"),
-                            # ("Working Daze", "http://comics.com/working_daze"),
-                            # ("Working It Out", "http://comics.com/working_it_out"),
-                            # ("Zack Hill", "http://comics.com/zack_hill"),
-                            # ("(Th)ink", "http://comics.com/think"),
-                            # "Tackling the political and social issues impacting communities of color."
-                            # ("Adam Zyglis", "http://comics.com/adam_zyglis"),
-                            # "Known for his excellent caricatures, as well as independent and incisive imagery. "
-                            # ("Andy Singer", "http://comics.com/andy_singer"),
-                            # ("Bill Day", "http://comics.com/bill_day"),
-                            # "Powerful images on sensitive issues."
-                            # ("Bill Schorr", "http://comics.com/bill_schorr"),
-                            # ("Bob Englehart", "http://comics.com/bob_englehart"),
-                            # ("Brian Fairrington", "http://comics.com/brian_fairrington"),
-                            # ("Bruce Beattie", "http://comics.com/bruce_beattie"),
-                            # ("Cam Cardow", "http://comics.com/cam_cardow"),
-                            # ("Chip Bok", "http://comics.com/chip_bok"),
-                            # ("Chris Britt", "http://comics.com/chris_britt"),
-                            # ("Chuck Asay", "http://comics.com/chuck_asay"),
-                            # ("Clay Bennett", "http://comics.com/clay_bennett"),
-                            # ("Daryl Cagle", "http://comics.com/daryl_cagle"),
-                            # ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
-                            # "David Fitzsimmons is a new editorial cartoons on comics.com.  He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
-                            # ("Drew Litton", "http://comics.com/drew_litton"),
-                            # "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
-                            # ("Ed Stein", "http://comics.com/ed_stein"),
-                            # "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
-                            # ("Eric Allie", "http://comics.com/eric_allie"),
-                            # "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
-                            # ("Gary Markstein", "http://comics.com/gary_markstein"),
-                            # ("Gary McCoy", "http://comics.com/gary_mccoy"),
-                            # "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for  Best  Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
-                            # ("Gary Varvel", "http://comics.com/gary_varvel"),
-                            # ("Henry Payne", "http://comics.com/henry_payne"),
-                            # ("JD Crowe", "http://comics.com/jd_crowe"),
-                            # ("Jeff Parker", "http://comics.com/jeff_parker"),
-                            # ("Jeff Stahler", "http://comics.com/jeff_stahler"),
-                            # ("Jerry Holbert", "http://comics.com/jerry_holbert"),
-                            # ("John Cole", "http://comics.com/john_cole"),
-                            # ("John Darkow", "http://comics.com/john_darkow"),
-                            # "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for  the Columbia Daily Tribune, Missouri"
-                            # ("John Sherffius", "http://comics.com/john_sherffius"),
-                            # ("Larry Wright", "http://comics.com/larry_wright"),
-                            # ("Lisa Benson", "http://comics.com/lisa_benson"),
-                            # ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
-                            # ("Matt Bors", "http://comics.com/matt_bors"),
-                            # ("Michael Ramirez", "http://comics.com/michael_ramirez"),
-                            # ("Mike Keefe", "http://comics.com/mike_keefe"),
-                            # ("Mike Luckovich", "http://comics.com/mike_luckovich"),
-                            # ("MIke Thompson", "http://comics.com/mike_thompson"),
-                            # ("Monte Wolverton", "http://comics.com/monte_wolverton"),
-                            # "Unique mix of perspectives"
-                            # ("Mr. Fish", "http://comics.com/mr_fish"),
-                            # "Side effects may include swelling"
-                            # ("Nate Beeler", "http://comics.com/nate_beeler"),
-                            # "Middle America meets the Beltway."
-                            # ("Nick Anderson", "http://comics.com/nick_anderson"),
-                            # ("Pat Bagley", "http://comics.com/pat_bagley"),
-                            # "Unfair and Totally Unbalanced."
-                            # ("Paul Szep", "http://comics.com/paul_szep"),
-                            # ("RJ Matson", "http://comics.com/rj_matson"),
-                            # "Power cartoons from NYC and Capitol Hill"
-                            # ("Rob Rogers", "http://comics.com/rob_rogers"),
-                            # "Humorous slant on current events"
-                            # ("Robert Ariail", "http://comics.com/robert_ariail"),
-                            # "Clever and unpredictable"
-                            # ("Scott Stantis", "http://comics.com/scott_stantis"),
-                            # ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
-                            # ("Steve Benson", "http://comics.com/steve_benson"),
-                            # ("Steve Breen", "http://comics.com/steve_breen"),
-                            # ("Steve Kelley", "http://comics.com/steve_kelley"),
-                            # ("Steve Sack", "http://comics.com/steve_sack"),
-                            ]:
+                       #(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
+                       #(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
+                       #(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
+                       #(u"Agnes", u"http://www.gocomics.com/agnes"),
+                       #(u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
+                       #(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
+                       (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
+                       #(u"Annie", u"http://www.gocomics.com/annie"),
+                       #(u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
+                       #(u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
+                       (u"B.C.", u"http://www.gocomics.com/bc"),
+                       #(u"Back in the Day", u"http://www.gocomics.com/backintheday"),
+                       #(u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
+                       (u"Baldo", u"http://www.gocomics.com/baldo"),
+                       #(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
+                       #(u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
+                       #(u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
+                       #(u"Ben", u"http://www.gocomics.com/ben"),
+                       #(u"Betty", u"http://www.gocomics.com/betty"),
+                       #(u"Bewley", u"http://www.gocomics.com/bewley"),
+                       #(u"Big Nate", u"http://www.gocomics.com/bignate"),
+                       #(u"Big Top", u"http://www.gocomics.com/bigtop"),
+                       #(u"Biographic", u"http://www.gocomics.com/biographic"),
+                       #(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
+                       #(u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
+                       #(u"Bliss", u"http://www.gocomics.com/bliss"),
+                       #(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
+                       #(u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
+                       #(u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
+                       #(u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
+                       #(u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
+                       (u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
+                       #(u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
+                       #(u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
+                       #(u"Brevity", u"http://www.gocomics.com/brevity"),
+                       #(u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
+                       (u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
+                       (u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
+                       #(u"Candorville", u"http://www.gocomics.com/candorville"),
+                       #(u"Cathy", u"http://www.gocomics.com/cathy"),
+                       #(u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
+                       #(u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
+                       #(u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
+                       #(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
+                       #(u"Cleats", u"http://www.gocomics.com/cleats"),
+                       #(u"Close to Home", u"http://www.gocomics.com/closetohome"),
+                       #(u"Committed", u"http://www.gocomics.com/committed"),
+                       #(u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
+                       #(u"Cornered", u"http://www.gocomics.com/cornered"),
+                       #(u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
+                       #(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
+                       #(u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
+                       #(u"Deep Cover", u"http://www.gocomics.com/deepcover"),
+                       #(u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
+                       #(u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
+                       #(u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
+                       #(u"Doodles", u"http://www.gocomics.com/doodles"),
+                       #(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
+                       #(u"Drabble", u"http://www.gocomics.com/drabble"),
+                       #(u"Eek!", u"http://www.gocomics.com/eek"),
+                       #(u"F Minus", u"http://www.gocomics.com/fminus"),
+                       #(u"Family Tree", u"http://www.gocomics.com/familytree"),
+                       #(u"Farcus", u"http://www.gocomics.com/farcus"),
+                       #(u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
+                       #(u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
+                       #(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
+                       #(u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
+                       (u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
+                       #(u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
+                       #(u"Fort Knox", u"http://www.gocomics.com/fortknox"),
+                       #(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
+                       #(u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
+                       (u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
+                       #(u"Frazz", u"http://www.gocomics.com/frazz"),
+                       #(u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
+                       #(u"Free Range", u"http://www.gocomics.com/freerange"),
+                       #(u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
+                       #(u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
+                       (u"Garfield", u"http://www.gocomics.com/garfield"),
+                       #(u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
+                       #(u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
+                       (u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
+                       #(u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
+                       #(u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
+                       #(u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
+                       #(u"Graffiti", u"http://www.gocomics.com/graffiti"),
+                       #(u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
+                       #(u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
+                       #(u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
+                       #(u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
+                       #(u"Home and Away", u"http://www.gocomics.com/homeandaway"),
+                       #(u"Housebroken", u"http://www.gocomics.com/housebroken"),
+                       #(u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
+                       #(u"Imagine This", u"http://www.gocomics.com/imaginethis"),
+                       #(u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
+                       #(u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
+                       #(u"Ink Pen", u"http://www.gocomics.com/inkpen"),
+                       #(u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
+                       #(u"Jane's World", u"http://www.gocomics.com/janesworld"),
+                       #(u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
+                       #(u"Jump Start", u"http://www.gocomics.com/jumpstart"),
+                       #(u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
+                       #(u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
+                       #(u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
+                       #(u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
+                       #(u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
+                       #(u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
+                       #(u"Lio", u"http://www.gocomics.com/lio"),
+                       #(u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
+                       #(u"Little Otto", u"http://www.gocomics.com/littleotto"),
+                       #(u"Lola", u"http://www.gocomics.com/lola"),
+                       #(u"Love Is...", u"http://www.gocomics.com/loveis"),
+                       (u"Luann", u"http://www.gocomics.com/luann"),
+                       #(u"Maintaining", u"http://www.gocomics.com/maintaining"),
+                       #(u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
+                       #(u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
+                       #(u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
+                       #(u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
+                       (u"Momma", u"http://www.gocomics.com/momma"),
+                       #(u"Monty", u"http://www.gocomics.com/monty"),
+                       #(u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
+                       #(u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
+                       #(u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
+                       #(u"Nancy", u"http://www.gocomics.com/nancy"),
+                       #(u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
+                       #(u"Nest Heads", u"http://www.gocomics.com/nestheads"),
+                       #(u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
+                       #(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
+                       (u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
+                       #(u"Off The Mark", u"http://www.gocomics.com/offthemark"),
+                       #(u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
+                       #(u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
+                       #(u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
+                       #(u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
+                       #(u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
+                       #(u"Overboard", u"http://www.gocomics.com/overboard"),
+                       #(u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
+                       (u"Peanuts", u"http://www.gocomics.com/peanuts"),
+                       (u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"),
+                       #(u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
+                       #(u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
+                       #(u"Pickles", u"http://www.gocomics.com/pickles"),
+                       #(u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
+                       #(u"Pluggers", u"http://www.gocomics.com/pluggers"),
+                       (u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
+                       #(u"PreTeena", u"http://www.gocomics.com/preteena"),
+                       #(u"Prickly City", u"http://www.gocomics.com/pricklycity"),
+                       #(u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
+                       #(u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
+                       #(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
+                       #(u"Reality Check", u"http://www.gocomics.com/realitycheck"),
+                       #(u"Red and Rover", u"http://www.gocomics.com/redandrover"),
+                       #(u"Red Meat", u"http://www.gocomics.com/redmeat"),
+                       #(u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
+                       #(u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
+                       #(u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
+                       (u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
+                       #(u"Rudy Park", u"http://www.gocomics.com/rudypark"),
+                       #(u"Scary Gary", u"http://www.gocomics.com/scarygary"),
+                       #(u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
+                       (u"Shoe", u"http://www.gocomics.com/shoe"),
+                       #(u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
+                       #(u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
+                       #(u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
+                       #(u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
+                       #(u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
+                       #(u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
+                       #(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
+                       #(u"Sylvia", u"http://www.gocomics.com/sylvia"),
+                       #(u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
+                       #(u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
+                       #(u"That's Life", u"http://www.gocomics.com/thatslife"),
+                       #(u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
+                       #(u"The Barn", u"http://www.gocomics.com/thebarn"),
+                       #(u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
+                       #(u"The Boondocks", u"http://www.gocomics.com/boondocks"),
+                       (u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
+                       #(u"The Buckets", u"http://www.gocomics.com/thebuckets"),
+                       #(u"The City", u"http://www.gocomics.com/thecity"),
+                       #(u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
+                       #(u"The Doozies", u"http://www.gocomics.com/thedoozies"),
+                       #(u"The Duplex", u"http://www.gocomics.com/duplex"),
+                       #(u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
+                       #(u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
+                       #(u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
+                       #(u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
+                       #(u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
+                       #(u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
+                       #(u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
+                       (u"The Middletons", u"http://www.gocomics.com/themiddletons"),
+                       #(u"The Norm", u"http://www.gocomics.com/thenorm"),
+                       #(u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
+                       #(u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
+                       #(u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
+                       #(u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
+                       #(u"TOBY", u"http://www.gocomics.com/toby"),
+                       #(u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
+                       #(u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
+                       #(u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
+                       #(u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
+                       #(u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
+                       #(u"Wee Pals", u"http://www.gocomics.com/weepals"),
+                       #(u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
+                       (u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
+                       #(u"Working Daze", u"http://www.gocomics.com/workingdaze"),
+                       #(u"Working It Out", u"http://www.gocomics.com/workingitout"),
+                       #(u"Yenny", u"http://www.gocomics.com/yenny"),
+                       #(u"Zack Hill", u"http://www.gocomics.com/zackhill"),
+                       #(u"Ziggy", u"http://www.gocomics.com/ziggy"),
+                       (u"9 to 5", u"http://www.gocomics.com/9to5"),
+                       (u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
+                       (u"Herman", u"http://www.gocomics.com/herman"),
+                       (u"Loose Parts", u"http://www.gocomics.com/looseparts"),
+                       (u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
+                       (u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"),
+                       (u"Rubes", u"http://www.gocomics.com/rubes"),
+                       (u"Speed Bump", u"http://www.gocomics.com/speedbump"),
+                       (u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
+                       (u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
+                       #
+                       ######## EDITORIAL CARTOONS #####################
+                       #(u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
+                       #(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
+                       #(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
+                       #(u"Bill Day", u"http://www.gocomics.com/billday"),
+                       #(u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
+                       #(u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
+                       #(u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
+                       #(u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
+                       #(u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
+                       #(u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
+                       #(u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
+                       #(u"Chip Bok",u"http://www.gocomics.com/chipbok"),
+                       #(u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
+                       #(u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
+                       #(u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
+                       #(u"Clay Jones",u"http://www.gocomics.com/clayjones"),
+                       #(u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
+                       #(u"Dana Summers",u"http://www.gocomics.com/danasummers"),
+                       #(u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
+                       #(u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
+                       #(u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
+                       #(u"Don Wright",u"http://www.gocomics.com/donwright"),
+                       #(u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
+                       #(u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
+                       #(u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
+                       #(u"Ed Stein", u"http://www.gocomics.com/edstein"),
+                       #(u"Eric Allie", u"http://www.gocomics.com/ericallie"),
+                       #(u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
+                       #(u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
+                       #(u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
+                       #(u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
+                       #(u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
+                       #(u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
+                       #(u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
+                       #(u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
+                       #(u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
+                       #(u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
+                       #(u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
+                       #(u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
+                       #(u"Joel Pett",u"http://www.gocomics.com/joelpett"),
+                       #(u"John Cole", u"http://www.gocomics.com/johncole"),
+                       #(u"John Darkow", u"http://www.gocomics.com/johndarkow"),
+                       #(u"John Deering",u"http://www.gocomics.com/johndeering"),
+                       #(u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
+                       #(u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
+                       #(u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
+                       #(u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
+                       #(u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
+                       #(u"Larry Wright", u"http://www.gocomics.com/larrywright"),
+                       #(u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
+                       #(u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
+                       #(u"Matt Bors", u"http://www.gocomics.com/mattbors"),
+                       #(u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
+                       #(u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
+                       #(u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
+                       #(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
+                       #(u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
+                       #(u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
+                       #(u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
+                       #(u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
+                       #(u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
+                       #(u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
+                       #(u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
+                       #(u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
+                       #(u"Paul Szep", u"http://www.gocomics.com/paulszep"),
+                       #(u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
+                       #(u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
+                       #(u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
+                       #(u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
+                       #(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
+                       #(u"Small World",u"http://www.gocomics.com/smallworld"),
+                       #(u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
+                       #(u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
+                       #(u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
+                       #(u"Steve Sack", u"http://www.gocomics.com/stevesack"),
+                       #(u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
+                       #(u"Ted Rall",u"http://www.gocomics.com/tedrall"),
+                       #(u"(Th)ink", u"http://www.gocomics.com/think"),
+                       #(u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
+                       #(u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
+                       #(u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
+                       #(u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
+                       #(u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
+                       #(u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
+                       #(u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
+                       #(u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
+                       #(u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
+                       #(u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
+                       #(u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
+                       #(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
+                       #(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
+                             ]:
+            print 'Working on: ', title
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds

    def make_links(self, url):
-        soup = self.index_to_soup(url)
-        # print 'soup: ', soup
-        title = ''
+        title = 'Temp'
        current_articles = []
-        from datetime import datetime, timedelta
-        now = datetime.now()
-        dates = [(now-timedelta(days=d)).strftime('%Y/%m/%d') for d in range(self.num_comics_to_get)]
-
-        for page in dates:
-            page_url = url + '/' + str(page)
-            print(page_url)
-            soup = self.index_to_soup(page_url)
-            if soup:
-                strip_tag = self.tag_to_string(soup.find('a'))
-                if strip_tag:
-                  print 'strip_tag: ', strip_tag
-                  title = strip_tag
-                  print 'title: ', title
+        pages = range(1, self.num_comics_to_get+1)
+        for page in pages:
+            page_soup = self.index_to_soup(url)
+            if page_soup:
+                try:
+                    strip_title = page_soup.find(name='div', attrs={'class':'top'}).h1.a.string
+                except:
+                    strip_title = 'Error - no Title found'
+                try:
+                    date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
+                    if not date_title:
+                        date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
+                except:
+                    date_title = 'Error - no Date found'
+                title = strip_title + ' - ' + date_title
+                for i in range(2):
+                    try:
+                        strip_url_date = page_soup.find(name='div', attrs={'class':'top'}).h1.a['href']
+                        break  # success - this is normal exit
+                    except:
+                        strip_url_date = None
+                        continue  # try to get strip_url_date again
+                for i in range(2):
+                    try:
+                        prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
+                        break  # success - this is normal exit
+                    except:
+                        prev_strip_url_date = None
+                        continue  # try to get prev_strip_url_date again
+                if strip_url_date:
+                    page_url = 'http://www.gocomics.com' + strip_url_date
+                else:
+                    continue
+                if prev_strip_url_date:
+                    prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date
+                else:
+                    continue
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
+            url = prev_page_url
        current_articles.reverse()
        return current_articles

+    def preprocess_html(self, soup):
+        if soup.title:
+            title_string = soup.title.string.strip()
+            _cd = title_string.split(',',1)[1]
+            comic_date = ' '.join(_cd.split(' ', 4)[0:-1])
+        if soup.h1.span:
+            artist = soup.h1.span.string
+            soup.h1.span.string.replaceWith(comic_date + artist)
+        feature_item = soup.find('p',attrs={'class':'feature_item'})
+        if feature_item.a:
+            a_tag = feature_item.a
+            a_href = a_tag["href"]
+            img_tag = a_tag.img
+            img_tag["src"] = a_href
+            img_tag["width"] = self.comic_size
+            img_tag["height"] = None
+        return self.adeify_images(soup)
+
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    img {max-width:100%; min-width:100%;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
-        '''
+    '''
+
--- a/recipes/kdefamily_pl.recipe
+++ b/recipes/kdefamily_pl.recipe
@ -12,5 +12,6 @@ class KDEFamilyPl(BasicNewsRecipe):
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
    no_stylesheets = True
+    remove_empty_feeds = True
    use_embedded_content = True
    feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
--- a/recipes/legeartis.recipe
+++ b/recipes/legeartis.recipe
@ -21,7 +21,7 @@ class LegeArtisRecipe(BasicNewsRecipe):

    no_stylesheets = True
    remove_javascript = True
-
+    remove_empty_feeds = True
    extra_css = '''
            img{clear: both;}
    '''
--- a/recipes/lomza.recipe
+++ b/recipes/lomza.recipe
@ -8,6 +8,7 @@ class Lomza(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 15
    no_stylesheets = True
+    extra_css = '#foto {float: right; max-width: 200px; margin-left: 10px;} #fotogaleria > div {float:left;} .br {clear: both;}'
    max_articles_per_feed = 100
    remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})]
    keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})]
--- a/recipes/nsfw_corp.recipe
+++ b/recipes/nsfw_corp.recipe
@ -1,11 +1,9 @@
-
 __license__   = 'GPL v3'
-__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2012-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nsfwcorp.com
 '''

-import urllib
 from calibre.web.feeds.news import BasicNewsRecipe

 class NotSafeForWork(BasicNewsRecipe):
@ -20,8 +18,8 @@ class NotSafeForWork(BasicNewsRecipe):
    needs_subscription     = True
    auto_cleanup           = False
    INDEX                  = 'https://www.nsfwcorp.com'
-    LOGIN                  = INDEX + '/login/target/'
-    SETTINGS               = INDEX + '/settings/'
+    LOGIN                  = INDEX + '/account/login/?next=%2F'
+    SETTINGS               = INDEX + '/account/settings/'
    use_embedded_content   = True
    language               = 'en'
    publication_type       = 'magazine'
@ -48,19 +46,20 @@ class NotSafeForWork(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
-        br.open(self.LOGIN)
+        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({ 'email':self.username
-                                     ,'password':self.password
-                                   })
-            br.open(self.LOGIN, data)
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['email'   ] = self.username
+            br['password'] = self.password
+            br.submit()
        return br

    def get_feeds(self):
        self.feeds = []
        soup = self.index_to_soup(self.SETTINGS)
        for item in soup.findAll('input', attrs={'type':'text'}):
-            if item.has_key('value') and item['value'].startswith('http://www.nsfwcorp.com/feed/'):
+            if item.has_key('value') and item['value'].startswith('https://www.nsfwcorp.com/feed/'):
               self.feeds.append(item['value'])
               return self.feeds
        return self.feeds
--- a/recipes/pc_lab.recipe
+++ b/recipes/pc_lab.recipe
@ -1,6 +1,6 @@
 #!/usr/bin/env  python
 from calibre.web.feeds.recipes import BasicNewsRecipe
-
+from calibre.ebooks.BeautifulSoup import Comment
 class PCLab(BasicNewsRecipe):
    cover_url             = 'http://pclab.pl/img/logo.png'
    title                 = u"PC Lab"
@ -52,6 +52,9 @@ class PCLab(BasicNewsRecipe):
            pager = soup2.find('div', attrs={'class':'next'})
            pagetext = soup2.find('div', attrs={'class':'substance'})
            pagetext = pagetext.find('div', attrs={'class':'data'})
+            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+            for comment in comments:
+                comment.extract()

            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
--- a/recipes/swiatkindle.recipe
+++ b/recipes/swiatkindle.recipe
@ -10,7 +10,7 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class swiatczytnikow(BasicNewsRecipe):
-    title          = u'Swiat Czytnikow'
+    title          = u'Świat Czytników'
    description    = u'Czytniki e-książek w Polsce. Jak wybrać, kupić i korzystać z Amazon Kindle i innych'
    language = 'pl'
    __author__ = u'Tomasz D\u0142ugosz'
--- a/recipes/the_sun.recipe
+++ b/recipes/the_sun.recipe
@ -1,4 +1,4 @@
-import re, random
+import random

 from calibre import browser
 from calibre.web.feeds.recipes import BasicNewsRecipe
@ -8,7 +8,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'The Sun UK'
    description = 'Articles from The Sun tabloid UK'
    __author__ = 'Dave Asbury'
-    # last updated 19/10/12 better cover fetch
+    # last updated 5/5/13 better cover fetch
    language = 'en_GB'
    oldest_article = 1
    max_articles_per_feed = 15
@ -29,16 +29,12 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
        dict(name='div',attrs={'class' : 'intro'}),
                                dict(name='h3'),
        dict(name='div',attrs={'id' : 'articlebody'}),
-           #dict(attrs={'class' : ['right_col_branding','related-stories','mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
-           #                dict(name='div',attrs={'class' : 'cf'}),
-          # dict(attrs={'title' : 'download flash'}),
-          #                 dict(attrs={'style' : 'padding: 5px'})

-           ]
+    ]
    remove_tags_after = [dict(id='bodyText')]
    remove_tags=[
-                  dict(name='li'),
-                              dict(attrs={'class' : 'grid-4 right-hand-column'}),
+                    dict(name='li'),
+                    dict(attrs={'class' : 'grid-4 right-hand-column'}),
        ]

    feeds          = [
@ -47,40 +43,24 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    (u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
    (u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
    ]
-# starsons code
-    def parse_feeds (self):
-      feeds = BasicNewsRecipe.parse_feeds(self)
-      for feed in feeds:
-        for article in feed.articles[:]:
-          print 'article.title is: ', article.title
-          if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url:
-            feed.articles.remove(article)
-          if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url:
-            feed.articles.remove(article)
-      return feeds
+    # starsons code
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for feed in feeds:
+            for article in feed.articles[:]:
+                if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url:
+                    feed.articles.remove(article)
+                if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url:
+                    feed.articles.remove(article)
+        return feeds

    def get_cover_url(self):
-        soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
-        # look for the block containing the sun button and url
-        cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_84.gif);'})
-
-        #cov = soup.find(attrs={'id' : 'large'})
-        cov2 = str(cov)
-
-        cov2='http://www.politicshome.com'+cov2[9:-133]
-        #cov2 now contains url of the page containing pic
-        #cov2 now contains url of the page containing pic
-        soup = self.index_to_soup(cov2)
-        cov = soup.find(attrs={'id' : 'large'})
-        cov=str(cov)
-        cov2 =  re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
-        cov2 = str(cov2)
-        cov2=cov2[2:len(cov2)-2]
        br = browser()
        br.set_handle_redirect(False)
+        cover_url = 'http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg'
+
        try:
-            br.open_novisit(cov2)
-            cover_url = cov2
+            br.open_novisit('http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg')
        except:
            cover_url = random.choice([
                'http://img.thesun.co.uk/multimedia/archive/00905/errorpage6_677961a_905507a.jpg'
@ -88,6 +68,6 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
                ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage5_677960a_905512a.jpg'
                ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage2_677957a_905502a.jpg'
                ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage3_677958a_905503a.jpg'
-                ])
+            ])

        return cover_url
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -531,3 +531,9 @@ numeric_collation = False
 # number here. The default is ten libraries.
 many_libraries = 10

+#: Highlight the count of books when using a Virtual Library
+# The count of books next to the Virtual Library button is highlighted in
+# yellow when using a Virtual Library. By setting this to False, you can turn
+# that off.
+highlight_virtual_library_book_count = True
+
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -116,7 +116,9 @@ tarball. Edit setup.py and set zip_safe=False. Then run::

 Run the following command to install python dependencies::

-    easy_install --always-unzip -U mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto cssselect
+    easy_install --always-unzip -U mechanize python-dateutil dnspython cssutils clientform pycrypto cssselect
+
+Install pyreadline from https://pypi.python.org/pypi/pyreadline/2.0

 Install pywin32 and edit win32com\__init__.py setting _frozen = True and
 __gen_path__ to a temp dir (otherwise it tries to set it to a dir in the
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,14 +12,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-04-21 08:00+0000\n"
+"PO-Revision-Date: 2013-05-06 09:36+0000\n"
 "Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-04-22 05:23+0000\n"
-"X-Generator: Launchpad (build 16567)\n"
+"X-Launchpad-Export-Date: 2013-05-07 05:28+0000\n"
+"X-Generator: Launchpad (build 16598)\n"
 "Language: ca\n"

 #. name for aaa
@ -2024,7 +2024,7 @@ msgstr "Àzeri meridional"

 #. name for aze
 msgid "Azerbaijani"
-msgstr "Serbi"
+msgstr ""

 #. name for azg
 msgid "Amuzgo; San Pedro Amuzgos"
@ -7288,7 +7288,7 @@ msgstr "Epie"

 #. name for epo
 msgid "Esperanto"
-msgstr "Alemany"
+msgstr "Esperanto"

 #. name for era
 msgid "Eravallan"
@ -21816,7 +21816,7 @@ msgstr "Ramoaaina"

 #. name for raj
 msgid "Rajasthani"
-msgstr "Marwari"
+msgstr ""

 #. name for rak
 msgid "Tulu-Bohuai"
--- a/setup/iso_639/cs.po
+++ b/setup/iso_639/cs.po
@ -13762,7 +13762,7 @@ msgstr ""

 #. name for lav
 msgid "Latvian"
-msgstr "litevština"
+msgstr ""

 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/da.po
+++ b/setup/iso_639/da.po
@ -1429,7 +1429,7 @@ msgstr ""

 #. name for arg
 msgid "Aragonese"
-msgstr "Færøsk"
+msgstr ""

 #. name for arh
 msgid "Arhuaco"
--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
@ -18,14 +18,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-04-11 13:29+0000\n"
+"PO-Revision-Date: 2013-05-06 09:41+0000\n"
 "Last-Translator: Simon Schütte <simonschuette@arcor.de>\n"
 "Language-Team: Ubuntu German Translators\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-04-12 05:20+0000\n"
-"X-Generator: Launchpad (build 16564)\n"
+"X-Launchpad-Export-Date: 2013-05-07 05:29+0000\n"
+"X-Generator: Launchpad (build 16598)\n"
 "Language: de\n"

 #. name for aaa
@ -319,7 +319,7 @@ msgstr "Adangme"

 #. name for adb
 msgid "Adabe"
-msgstr "Adangme"
+msgstr "Adabe"

 #. name for add
 msgid "Dzodinka"
@ -367,7 +367,7 @@ msgstr "Adap"

 #. name for adq
 msgid "Adangbe"
-msgstr "Adangme"
+msgstr "Adangbe"

 #. name for adr
 msgid "Adonara"
--- a/setup/iso_639/eu.po
+++ b/setup/iso_639/eu.po
@ -2022,7 +2022,7 @@ msgstr ""

 #. name for aze
 msgid "Azerbaijani"
-msgstr "Turkiera"
+msgstr ""

 #. name for azg
 msgid "Amuzgo; San Pedro Amuzgos"
@ -13126,7 +13126,7 @@ msgstr ""

 #. name for kur
 msgid "Kurdish"
-msgstr "Turkiera"
+msgstr ""

 #. name for kus
 msgid "Kusaal"
@ -16190,7 +16190,7 @@ msgstr ""

 #. name for mlt
 msgid "Maltese"
-msgstr "Koreera"
+msgstr ""

 #. name for mlu
 msgid "To'abaita"
--- a/setup/iso_639/gl.po
+++ b/setup/iso_639/gl.po
@ -13764,7 +13764,7 @@ msgstr "Laba"

 #. name for lav
 msgid "Latvian"
-msgstr "Lituano"
+msgstr ""

 #. name for law
 msgid "Lauje"
@ -22212,7 +22212,7 @@ msgstr "Roglai do norte"

 #. name for roh
 msgid "Romansh"
-msgstr "Romanés"
+msgstr ""

 #. name for rol
 msgid "Romblomanon"
--- a/setup/iso_639/hu.po
+++ b/setup/iso_639/hu.po
@ -20538,7 +20538,7 @@ msgstr ""

 #. name for peo
 msgid "Persian; Old (ca. 600-400 B.C.)"
-msgstr "perzsa"
+msgstr ""

 #. name for pep
 msgid "Kunja"
--- a/setup/iso_639/is.po
+++ b/setup/iso_639/is.po
@ -15049,7 +15049,7 @@ msgstr "Magahi"

 #. name for mah
 msgid "Marshallese"
-msgstr "Maltneska"
+msgstr ""

 #. name for mai
 msgid "Maithili"
--- a/setup/iso_639/ko.po
+++ b/setup/iso_639/ko.po
@ -3742,7 +3742,7 @@ msgstr ""

 #. name for bre
 msgid "Breton"
-msgstr "프랑스어"
+msgstr ""

 #. name for brf
 msgid "Bera"
--- a/setup/iso_639/mr.po
+++ b/setup/iso_639/mr.po
@ -6804,7 +6804,7 @@ msgstr "डोगोन; तेबुल उरे"

 #. name for dua
 msgid "Duala"
-msgstr "ड्युला"
+msgstr ""

 #. name for dub
 msgid "Dubli"
--- a/setup/iso_639/nb.po
+++ b/setup/iso_639/nb.po
@ -27790,7 +27790,7 @@ msgstr ""

 #. name for wln
 msgid "Walloon"
-msgstr "Vietnamesisk"
+msgstr ""

 #. name for wlo
 msgid "Wolio"
--- a/setup/iso_639/oc.po
+++ b/setup/iso_639/oc.po
@ -9862,7 +9862,7 @@ msgstr "Hya"

 #. name for hye
 msgid "Armenian"
-msgstr "Albanés"
+msgstr ""

 #. name for iai
 msgid "Iaai"
@ -13762,7 +13762,7 @@ msgstr "Laba"

 #. name for lav
 msgid "Latvian"
-msgstr "Lituanian"
+msgstr ""

 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/ru.po
+++ b/setup/iso_639/ru.po
@ -2089,7 +2089,7 @@ msgstr "Башкирский"

 #. name for bal
 msgid "Baluchi"
-msgstr "Балийский"
+msgstr ""

 #. name for bam
 msgid "Bambara"
--- a/setup/iso_639/sk.po
+++ b/setup/iso_639/sk.po
@ -13763,7 +13763,7 @@ msgstr ""

 #. name for lav
 msgid "Latvian"
-msgstr "Lotyšský"
+msgstr ""

 #. name for law
 msgid "Lauje"
--- a/setup/iso_639/zh_CN.po
+++ b/setup/iso_639/zh_CN.po
@ -1016,7 +1016,7 @@ msgstr ""

 #. name for amh
 msgid "Amharic"
-msgstr "阿拉伯语"
+msgstr ""

 #. name for ami
 msgid "Amis"
--- a/setup/translations.py
+++ b/setup/translations.py
@ -18,7 +18,7 @@ def qt_sources():
            'src/gui/widgets/qdialogbuttonbox.cpp',
    ]))

-class POT(Command): # {{{
+class POT(Command):  # {{{

    description = 'Update the .pot translation template'
    PATH = os.path.join(Command.SRC, __appname__, 'translations')
@ -63,7 +63,6 @@ class POT(Command): # {{{

        return '\n'.join(ans)

-
    def run(self, opts):
        pot_header = textwrap.dedent('''\
        # Translation template file..
@ -117,11 +116,10 @@ class POT(Command): # {{{
                f.write(src)
            self.info('Translations template:', os.path.abspath(pot))

-
        return pot
 # }}}

-class Translations(POT): # {{{
+class Translations(POT):  # {{{
    description='''Compile the translations'''
    DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
            'locales')
@ -134,6 +132,7 @@ class Translations(POT): # {{{
        return locale, os.path.join(self.DEST, locale, 'messages.mo')

    def run(self, opts):
+        self.iso639_errors = []
        for f in self.po_files():
            locale, dest = self.mo_file(f)
            base = os.path.dirname(dest)
@ -146,18 +145,46 @@ class Translations(POT): # {{{
                    '%s.po'%iscpo)

            if os.path.exists(iso639):
+                self.check_iso639(iso639)
                dest = self.j(self.d(dest), 'iso639.mo')
                if self.newer(dest, iso639):
-                    self.info('\tCopying ISO 639 translations')
+                    self.info('\tCopying ISO 639 translations for %s' % iscpo)
                    subprocess.check_call(['msgfmt', '-o', dest, iso639])
            elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc',
                    'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku',
                    'fr_CA', 'him', 'jv', 'ka', 'fur', 'ber'):
                self.warn('No ISO 639 translations for locale:', locale)

+        if self.iso639_errors:
+            for err in self.iso639_errors:
+                print (err)
+            raise SystemExit(1)
+
        self.write_stats()
        self.freeze_locales()

+    def check_iso639(self, path):
+        from calibre.utils.localization import langnames_to_langcodes
+        with open(path, 'rb') as f:
+            raw = f.read()
+        rmap = {}
+        msgid = None
+        for match in re.finditer(r'^(msgid|msgstr)\s+"(.*?)"', raw, re.M):
+            if match.group(1) == 'msgid':
+                msgid = match.group(2)
+            else:
+                msgstr = match.group(2)
+                if not msgstr:
+                    continue
+                omsgid = rmap.get(msgstr, None)
+                if omsgid is not None:
+                    cm = langnames_to_langcodes([omsgid, msgid])
+                    if cm[msgid] and cm[omsgid] and cm[msgid] != cm[omsgid]:
+                        self.iso639_errors.append('In file %s the name %s is used as translation for both %s and %s' % (
+                            os.path.basename(path), msgstr, msgid, rmap[msgstr]))
+                    # raise SystemExit(1)
+                rmap[msgstr] = msgid
+
    def freeze_locales(self):
        zf = self.DEST + '.zip'
        from calibre import CurrentDir
@ -191,7 +218,6 @@ class Translations(POT): # {{{
            locale = self.mo_file(f)[0]
            stats[locale] = min(1.0, float(trans)/total)

-
        import cPickle
        cPickle.dump(stats, open(dest, 'wb'), -1)

@ -211,7 +237,7 @@ class Translations(POT): # {{{

 # }}}

-class GetTranslations(Translations): # {{{
+class GetTranslations(Translations):  # {{{

    description = 'Get updated translations from Launchpad'
    BRANCH = 'lp:~kovid/calibre/translations'
@ -286,7 +312,7 @@ class GetTranslations(Translations): # {{{

 # }}}

-class ISO639(Command): # {{{
+class ISO639(Command):  # {{{

    description = 'Compile translations for ISO 639 codes'
    DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 29)
+numeric_version = (0, 9, 30)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1476,7 +1476,6 @@ class StoreKoobeStore(StoreBase):
    drm_free_only = True
    headquarters = 'PL'
    formats = ['EPUB', 'MOBI', 'PDF']
-    affiliate = True

 class StoreLegimiStore(StoreBase):
    name = 'Legimi'
@ -1660,7 +1659,6 @@ class StoreWoblinkStore(StoreBase):

    headquarters = 'PL'
    formats = ['EPUB', 'MOBI', 'PDF', 'WOBLINK']
-    affiliate = True

 class XinXiiStore(StoreBase):
    name = 'XinXii'
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -19,10 +19,10 @@ class BLACKBERRY(USBMS):

    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]
+    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220, 0x232]

    VENDOR_NAME = 'RIM'
-    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['BLACKBERRY_SD', 'BLACKBERRY']

    MAIN_MEMORY_VOLUME_LABEL  = 'Blackberry SD Card'

--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -35,7 +35,7 @@ class KOBO(USBMS):
    gui_name = 'Kobo Reader'
    description = _('Communicate with the Kobo Reader')
    author = 'Timothy Legge and David Forrester'
-    version = (2, 0, 9)
+    version = (2, 0, 10)

    dbversion = 0
    fwversion = 0
@ -45,6 +45,7 @@ class KOBO(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']

    booklist_class = CollectionsBookList
+    book_class = Book

    # Ordered list of supported formats
    FORMATS     = ['epub', 'pdf', 'txt', 'cbz', 'cbr']
@ -115,7 +116,6 @@ class KOBO(USBMS):

    def initialize(self):
        USBMS.initialize(self)
-        self.book_class = Book
        self.dbversion = 7

    def books(self, oncard=None, end_session=True):
@ -1213,7 +1213,7 @@ class KOBOTOUCH(KOBO):
    min_dbversion_archive           = 71
    min_dbversion_images_on_sdcard  = 77

-    max_supported_fwversion         = (2,5,1)
+    max_supported_fwversion         = (2,5,3)
    min_fwversion_images_on_sdcard  = (2,4,1)

    has_kepubs = True
@ -1237,11 +1237,9 @@ class KOBOTOUCH(KOBO):
            _('Keep cover aspect ratio') +
            ':::'+_('When uploading covers, do not change the aspect ratio when resizing for the device.'
                    ' This is for firmware versions 2.3.1 and later.'),
-            _('Show expired books') +
-            ':::'+_('A bug in an earlier version left non kepubs book records'
-                ' in the database.  With this option Calibre will show the '
-                'expired records and allow you to delete them with '
-                'the new delete logic.'),
+            _('Show archived books') +
+            ':::'+_('Archived books are listed on the device but need to be downloaded to read.'
+                    ' Use this option to show these books and match them with books in the calibre library.'),
            _('Show Previews') +
            ':::'+_('Kobo previews are included on the Touch and some other versions'
                ' by default they are no longer displayed as there is no good reason to '
@ -1289,7 +1287,7 @@ class KOBOTOUCH(KOBO):
    OPT_UPLOAD_COVERS               = 3
    OPT_UPLOAD_GRAYSCALE_COVERS     = 4
    OPT_KEEP_COVER_ASPECT_RATIO     = 5
-    OPT_SHOW_EXPIRED_BOOK_RECORDS   = 6
+    OPT_SHOW_ARCHIVED_BOOK_RECORDS  = 6
    OPT_SHOW_PREVIEWS               = 7
    OPT_SHOW_RECOMMENDATIONS        = 8
    OPT_UPDATE_SERIES_DETAILS       = 9
@ -1347,6 +1345,10 @@ class KOBOTOUCH(KOBO):
        self.set_device_name()
        return super(KOBOTOUCH, self).get_device_information(end_session)

+
+    def device_database_path(self):
+        return self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')
+
    def books(self, oncard=None, end_session=True):
        debug_print("KoboTouch:books - oncard='%s'"%oncard)
        from calibre.ebooks.metadata.meta import path_to_ext
@ -1599,9 +1601,7 @@ class KOBOTOUCH(KOBO):

        self.debug_index = 0
        import sqlite3 as sqlite
-        with closing(sqlite.connect(
-            self.normalize_path(self._main_prefix +
-                '.kobo/KoboReader.sqlite'))) as connection:
+        with closing(sqlite.connect(self.device_database_path())) as connection:
            debug_print("KoboTouch:books - reading device database")

            # return bytestrings if the content cannot the decoded as unicode
@ -1618,7 +1618,21 @@ class KOBOTOUCH(KOBO):
            debug_print("KoboTouch:books - shelf list:", self.bookshelvelist)

            opts = self.settings()
-            if self.supports_series():
+            if self.supports_kobo_archive():
+                query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \
+                    "ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \
+                    "IsDownloaded, Series, SeriesNumber, ___UserID " \
+                    " from content " \
+                    " where BookID is Null " \
+                    " and ((Accessibility = -1 and IsDownloaded in ('true', 1 )) or (Accessibility in (1,2) %(expiry)s) " \
+                    "    %(previews)s %(recomendations)s )" \
+                    " and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) and ContentType = 6)") % \
+                        dict(\
+                             expiry="" if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else "and IsDownloaded in ('true', 1)", \
+                             previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \
+                             recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \
+                             )
+            elif self.supports_series():
                query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \
                    "ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \
                    "IsDownloaded, Series, SeriesNumber, ___UserID " \
@ -1627,7 +1641,7 @@ class KOBOTOUCH(KOBO):
                    " and ((Accessibility = -1 and IsDownloaded in ('true', 1)) or (Accessibility in (1,2)) %(previews)s %(recomendations)s )" \
                    " and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s") % \
                        dict(\
-                             expiry=" and ContentType = 6)" if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ")", \
+                             expiry=" and ContentType = 6)" if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ")", \
                             previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \
                             recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \
                             )
@ -1638,7 +1652,7 @@ class KOBOTOUCH(KOBO):
                    ' from content ' \
                    ' where BookID is Null %(previews)s %(recomendations)s and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % \
                        dict(\
-                             expiry=' and ContentType = 6)' if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')', \
+                             expiry=' and ContentType = 6)' if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ')', \
                             previews=' and Accessibility <> 6' if opts.extra_customization[self.OPT_SHOW_PREVIEWS] == False else '', \
                             recomendations=' and IsDownloaded in (\'true\', 1)' if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] == False else ''\
                             )
@ -1648,7 +1662,7 @@ class KOBOTOUCH(KOBO):
                    '"1" as IsDownloaded, null as Series, null as SeriesNumber, ___UserID' \
                    ' from content where ' \
                    'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
-                    if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
+                    if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ')')
            else:
                query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
                    'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, ' \
@ -2586,7 +2600,7 @@ class KOBOTOUCH(KOBO):
    def modify_database_check(self, function):
        # Checks to see whether the database version is supported
        # and whether the user has chosen to support the firmware version
-#        debug_print("KoboTouch:modify_database_check - self.fwversion <= self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion)
+#        debug_print("KoboTouch:modify_database_check - self.fwversion > self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion)
        if self.dbversion > self.supported_dbversion or self.fwversion > self.max_supported_fwversion:
            # Unsupported database
            opts = self.settings()
--- a/src/calibre/ebooks/conversion/plugins/txt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@ -63,7 +63,6 @@ class TXTInput(InputFormatPlugin):
                normalize_line_endings, convert_textile, remove_indents,
                block_to_single_line, separate_hard_scene_breaks)

-
        self.log = log
        txt = ''
        log.debug('Reading text from file...')
@ -92,8 +91,15 @@ class TXTInput(InputFormatPlugin):
            log.debug('Using user specified input encoding of %s' % ienc)
        else:
            det_encoding = detect(txt)
-            ienc = det_encoding['encoding']
-            log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, det_encoding['confidence'] * 100))
+            det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
+            if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
+                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
+                    'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
+                # Microsoft Word exports to HTML with encoding incorrectly set to
+                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
+                det_encoding = 'gbk'
+            ienc = det_encoding
+            log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, confidence * 100))
        if not ienc:
            ienc = 'utf-8'
            log.debug('No input encoding specified and could not auto detect using %s' % ienc)
--- a/src/calibre/ebooks/docx/init.py
+++ b/src/calibre/ebooks/docx/init.py
@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+class InvalidDOCX(ValueError):
+    pass
+
--- a/src/calibre/ebooks/docx/block_styles.py
+++ b/src/calibre/ebooks/docx/block_styles.py
@ -0,0 +1,267 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from collections import OrderedDict
+from calibre.ebooks.docx.names import XPath, get
+
+class Inherit:
+    pass
+inherit = Inherit()
+
+def binary_property(parent, name):
+    vals = XPath('./w:%s' % name)(parent)
+    if not vals:
+        return inherit
+    val = get(vals[0], 'w:val', 'on')
+    return True if val in {'on', '1', 'true'} else False
+
+def simple_color(col, auto='black'):
+    if not col or col == 'auto' or len(col) != 6:
+        return auto
+    return '#'+col
+
+def simple_float(val, mult=1.0):
+    try:
+        return float(val) * mult
+    except (ValueError, TypeError, AttributeError, KeyError):
+        return None
+
+
+LINE_STYLES = {  # {{{
+    'basicBlackDashes': 'dashed',
+    'basicBlackDots': 'dotted',
+    'basicBlackSquares': 'dashed',
+    'basicThinLines': 'solid',
+    'dashDotStroked': 'groove',
+    'dashed': 'dashed',
+    'dashSmallGap': 'dashed',
+    'dotDash': 'dashed',
+    'dotDotDash': 'dashed',
+    'dotted': 'dotted',
+    'double': 'double',
+    'inset': 'inset',
+    'nil': 'none',
+    'none': 'none',
+    'outset': 'outset',
+    'single': 'solid',
+    'thick': 'solid',
+    'thickThinLargeGap': 'double',
+    'thickThinMediumGap': 'double',
+    'thickThinSmallGap' : 'double',
+    'thinThickLargeGap': 'double',
+    'thinThickMediumGap': 'double',
+    'thinThickSmallGap': 'double',
+    'thinThickThinLargeGap': 'double',
+    'thinThickThinMediumGap': 'double',
+    'thinThickThinSmallGap': 'double',
+    'threeDEmboss': 'ridge',
+    'threeDEngrave': 'groove',
+    'triple': 'double',
+}  # }}}
+
+# Read from XML {{{
+def read_border(parent, dest):
+    tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
+            'border_%s_style':inherit, 'border_%s_color':inherit}
+    vals = {}
+    for edge in ('left', 'top', 'right', 'bottom'):
+        vals.update({k % edge:v for k, v in tvals.iteritems()})
+
+    for border in XPath('./w:pBdr')(parent):
+        for edge in ('left', 'top', 'right', 'bottom'):
+            for elem in XPath('./w:%s' % edge):
+                color = get(elem, 'w:color')
+                if color is not None:
+                    vals['border_%s_color' % edge] = simple_color(color)
+                style = get(elem, 'w:val')
+                if style is not None:
+                    vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
+                space = get(elem, 'w:space')
+                if space is not None:
+                    try:
+                        vals['padding_%s' % edge] = float(space)
+                    except (ValueError, TypeError):
+                        pass
+                sz = get(elem, 'w:sz')
+                if sz is not None:
+                    # we dont care about art borders (they are only used for page borders)
+                    try:
+                        vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
+                    except (ValueError, TypeError):
+                        pass
+
+    for key, val in vals.iteritems():
+        setattr(dest, key, val)
+
+def read_indent(parent, dest):
+    padding_left = padding_right = text_indent = inherit
+    for indent in XPath('./w:ind')(parent):
+        l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars')
+        pl = simple_float(lc, 0.01) if lc is not None else simple_float(l, 0.05) if l is not None else None
+        if pl is not None:
+            padding_left = '%.3g%s' % (pl, 'em' if lc is not None else 'pt')
+
+        r, rc = get(indent, 'w:right'), get(indent, 'w:rightChars')
+        pr = simple_float(rc, 0.01) if rc is not None else simple_float(r, 0.05) if r is not None else None
+        if pr is not None:
+            padding_right = '%.3g%s' % (pr, 'em' if rc is not None else 'pt')
+
+        h, hc = get(indent, 'w:hanging'), get(indent, 'w:hangingChars')
+        fl, flc = get(indent, 'w:firstLine'), get(indent, 'w:firstLineChars')
+        h = h if h is None else '-'+h
+        hc = hc if hc is None else '-'+hc
+        ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
+              simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
+        if ti is not None:
+            text_indent = '%.3g%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
+
+    setattr(dest, 'margin_left', padding_left)
+    setattr(dest, 'margin_right', padding_right)
+    setattr(dest, 'text_indent', text_indent)
+
+def read_justification(parent, dest):
+    ans = inherit
+    for jc in XPath('./w:jc[@w:val]')(parent):
+        val = get(jc, 'w:val')
+        if not val:
+            continue
+        if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
+            ans = 'justify'
+        if val in {'left', 'center', 'right',}:
+            ans = val
+    setattr(dest, 'text_align', ans)
+
+def read_spacing(parent, dest):
+    padding_top = padding_bottom = line_height = inherit
+    for s in XPath('./w:spacing')(parent):
+        a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing')
+        pb = None if aa in {'on', '1', 'true'} else simple_float(al, 0.02) if al is not None else simple_float(a, 0.05) if a is not None else None
+        if pb is not None:
+            padding_bottom = '%.3g%s' % (pb, 'ex' if al is not None else 'pt')
+
+        b, bl, bb = get(s, 'w:before'), get(s, 'w:beforeLines'), get(s, 'w:beforeAutospacing')
+        pt = None if bb in {'on', '1', 'true'} else simple_float(bl, 0.02) if bl is not None else simple_float(b, 0.05) if b is not None else None
+        if pt is not None:
+            padding_top = '%.3g%s' % (pt, 'ex' if bl is not None else 'pt')
+
+        l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
+        if l is not None:
+            lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
+            line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
+
+    setattr(dest, 'margin_top', padding_top)
+    setattr(dest, 'margin_bottom', padding_bottom)
+    setattr(dest, 'line_height', line_height)
+
+def read_direction(parent, dest):
+    ans = inherit
+    for jc in XPath('./w:textFlow[@w:val]')(parent):
+        val = get(jc, 'w:val')
+        if not val:
+            continue
+        if 'rl' in val.lower():
+            ans = 'rtl'
+    setattr(dest, 'direction', ans)
+
+def read_shd(parent, dest):
+    ans = inherit
+    for shd in XPath('./w:shd[@w:fill]')(parent):
+        val = get(shd, 'w:fill')
+        if val:
+            ans = simple_color(val, auto='transparent')
+    setattr(dest, 'background_color', ans)
+# }}}
+
+class ParagraphStyle(object):
+
+    all_properties = (
+        'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
+        'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
+        'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
+        'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
+
+        # Border margins padding
+        'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
+        'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
+        'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
+        'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
+        'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
+
+        # Misc.
+        'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
+    )
+
+    def __init__(self, pPr=None):
+        self.linked_style = None
+        if pPr is None:
+            for p in self.all_properties:
+                setattr(self, p, inherit)
+        else:
+            for p in (
+                'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
+                'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
+                'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
+                'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
+            ):
+                setattr(self, p, binary_property(pPr, p))
+
+            for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
+                f = globals()['read_%s' % x]
+                f(pPr, self)
+
+            for s in XPath('./w:pStyle[@w:val]')(pPr):
+                self.linked_style = get(s, 'w:val')
+
+        self._css = None
+
+    def update(self, other):
+        for prop in self.all_properties:
+            nval = getattr(other, prop)
+            if nval is not inherit:
+                setattr(self, prop, nval)
+        if other.linked_style is not None:
+            self.linked_style = other.linked_style
+
+    def resolve_based_on(self, parent):
+        for p in self.all_properties:
+            val = getattr(self, p)
+            if val is inherit:
+                setattr(self, p, getattr(parent, p))
+
+    @property
+    def css(self):
+        if self._css is None:
+            self._css = c = OrderedDict()
+            if self.keepLines is True:
+                c['page-break-inside'] = 'avoid'
+            if self.pageBreakBefore is True:
+                c['page-break-before'] = 'always'
+            for edge in ('left', 'top', 'right', 'bottom'):
+                val = getattr(self, 'border_%s_width' % edge)
+                if val is not inherit:
+                    c['border-left-width'] = '%.3gpt' % val
+                for x in ('style', 'color'):
+                    val = getattr(self, 'border_%s_%s' % (edge, x))
+                    if val is not inherit:
+                        c['border-%s-%s' % (edge, x)] = val
+                val = getattr(self, 'padding_%s' % edge)
+                if val is not inherit:
+                    c['padding-%s' % edge] = '%.3gpt' % val
+                val = getattr(self, 'margin_%s' % edge)
+                if val is not inherit:
+                    c['margin-%s' % edge] = val
+
+            for x in ('text_indent', 'text_align', 'line_height', 'background_color'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    c[x.replace('_', '-')] = val
+        return self._css
+
+        # TODO: keepNext must be done at markup level
+
+
--- a/src/calibre/ebooks/docx/char_styles.py
+++ b/src/calibre/ebooks/docx/char_styles.py
@ -0,0 +1,230 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from collections import OrderedDict
+from calibre.ebooks.docx.block_styles import (  # noqa
+    inherit, simple_color, LINE_STYLES, simple_float, binary_property, read_shd)
+from calibre.ebooks.docx.names import XPath, get
+
+# Read from XML {{{
+def read_text_border(parent, dest):
+    border_color = border_style = border_width = padding = inherit
+    elems = XPath('./w:bdr')(parent)
+    if elems:
+        border_color = simple_color('auto')
+        border_style = 'solid'
+        border_width = 1
+    for elem in elems:
+        color = get(elem, 'w:color')
+        if color is not None:
+            border_color = simple_color(color)
+        style = get(elem, 'w:val')
+        if style is not None:
+            border_style = LINE_STYLES.get(style, 'solid')
+        space = get(elem, 'w:space')
+        if space is not None:
+            try:
+                padding = float(space)
+            except (ValueError, TypeError):
+                pass
+        sz = get(elem, 'w:sz')
+        if sz is not None:
+            # we dont care about art borders (they are only used for page borders)
+            try:
+                border_width = min(96, max(2, float(sz))) / 8
+            except (ValueError, TypeError):
+                pass
+
+    setattr(dest, 'border_color', border_color)
+    setattr(dest, 'border_style', border_style)
+    setattr(dest, 'border_width', border_width)
+    setattr(dest, 'padding', padding)
+
+def read_color(parent, dest):
+    ans = inherit
+    for col in XPath('./w:color[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        ans = simple_color(val)
+    setattr(dest, 'color', ans)
+
+def read_highlight(parent, dest):
+    ans = inherit
+    for col in XPath('./w:highlight[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        if not val or val == 'none':
+            val = 'transparent'
+        ans = val
+    setattr(dest, 'highlight', ans)
+
+def read_lang(parent, dest):
+    ans = inherit
+    for col in XPath('./w:lang[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if not val:
+            continue
+        try:
+            code = int(val, 16)
+        except (ValueError, TypeError):
+            ans = val
+        else:
+            from calibre.ebooks.docx.lcid import lcid
+            val = lcid.get(code, None)
+            if val:
+                ans = val
+    setattr(dest, 'lang', ans)
+
+def read_letter_spacing(parent, dest):
+    ans = inherit
+    for col in XPath('./w:spacing[@w:val]')(parent):
+        val = simple_float(get(col, 'w:val'), 0.05)
+        if val is not None:
+            ans = val
+    setattr(dest, 'letter_spacing', ans)
+
+def read_sz(parent, dest):
+    ans = inherit
+    for col in XPath('./w:sz[@w:val]')(parent):
+        val = simple_float(get(col, 'w:val'), 0.5)
+        if val is not None:
+            ans = val
+    setattr(dest, 'font_size', ans)
+
+def read_underline(parent, dest):
+    ans = inherit
+    for col in XPath('./w:u[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if val:
+            ans = 'underline'
+    setattr(dest, 'text_decoration', ans)
+
+def read_vert_align(parent, dest):
+    ans = inherit
+    for col in XPath('./w:vertAlign[@w:val]')(parent):
+        val = get(col, 'w:val')
+        if val and val in {'baseline', 'subscript', 'superscript'}:
+            ans = val
+    setattr(dest, 'vert_align', ans)
+# }}}
+
+class RunStyle(object):
+
+    all_properties = {
+        'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint',
+        'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
+
+        'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
+        'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang',
+    }
+
+    toggle_properties = {
+        'b', 'bCs', 'caps', 'emboss', 'i', 'iCs', 'imprint', 'shadow', 'smallCaps', 'strike', 'dstrike', 'vanish',
+    }
+
+    def __init__(self, rPr=None):
+        self.linked_style = None
+        if rPr is None:
+            for p in self.all_properties:
+                setattr(self, p, inherit)
+        else:
+            for p in (
+                'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
+                'smallCaps', 'strike', 'vanish',
+            ):
+                setattr(self, p, binary_property(rPr, p))
+
+            for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'):
+                f = globals()['read_%s' % x]
+                f(rPr, self)
+
+            for s in XPath('./w:rStyle[@w:val]')(rPr):
+                self.linked_style = get(s, 'w:val')
+
+        self._css = None
+
+    def update(self, other):
+        for prop in self.all_properties:
+            nval = getattr(other, prop)
+            if nval is not inherit:
+                setattr(self, prop, nval)
+        if other.linked_style is not None:
+            self.linked_style = other.linked_style
+
+    def resolve_based_on(self, parent):
+        for p in self.all_properties:
+            val = getattr(self, p)
+            if val is inherit:
+                setattr(self, p, getattr(parent, p))
+
+    @property
+    def css(self):
+        if self._css is None:
+            c = self._css = OrderedDict()
+            td = set()
+            if self.text_decoration is not inherit:
+                td.add(self.text_decoration)
+            if self.strike:
+                td.add('line-through')
+            if self.dstrike:
+                td.add('line-through')
+            if td:
+                c['text-decoration'] = ' '.join(td)
+            if self.caps is True:
+                c['text-transform'] = 'uppercase'
+            if self.i is True:
+                c['font-style'] = 'italic'
+            if self.shadow:
+                c['text-shadow'] = '2px 2px'
+            if self.smallCaps is True:
+                c['font-variant'] = 'small-caps'
+            if self.vanish is True:
+                c['display'] = 'none'
+
+            for x in ('color', 'style', 'width'):
+                val = getattr(self, 'border_'+x)
+                if x == 'width' and val is not inherit:
+                    val = '%.3gpt' % val
+                if val is not inherit:
+                    c['border-%s' % x] = val
+            if self.padding is not inherit:
+                c['padding'] = '%.3gpt' % self.padding
+
+            for x in ('color', 'background_color'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    c[x.replace('_', '-')] = val
+
+            for x in ('letter_spacing', 'font_size'):
+                val = getattr(self, x)
+                if val is not inherit:
+                    c[x.replace('_', '-')] = '%.3gpt' % val
+
+            if self.highlight is not inherit and self.highlight != 'transparent':
+                c['background-color'] = self.highlight
+
+            if self.b:
+                c['font-weight'] = 'bold'
+        return self._css
+
+    def same_border(self, other):
+        for x in (self, other):
+            has_border = False
+            for y in ('color', 'style', 'width'):
+                if ('border-%s' % y) in x.css:
+                    has_border = True
+                    break
+            if not has_border:
+                return False
+
+        s = tuple(self.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
+        o = tuple(other.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
+        return s == o
+
--- a/src/calibre/ebooks/docx/container.py
+++ b/src/calibre/ebooks/docx/container.py
@ -0,0 +1,231 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import os, sys, shutil
+
+from lxml import etree
+
+from calibre import walk, guess_type
+from calibre.ebooks.metadata import string_to_authors
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.ebooks.docx import InvalidDOCX
+from calibre.ebooks.docx.names import DOCUMENT, DOCPROPS, XPath, APPPROPS
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.localization import canonicalize_lang
+from calibre.utils.logging import default_log
+from calibre.utils.zipfile import ZipFile
+from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
+
+def fromstring(raw, parser=RECOVER_PARSER):
+    return etree.fromstring(raw, parser=parser)
+
+# Read metadata {{{
+def read_doc_props(raw, mi):
+    root = fromstring(raw)
+    titles = XPath('//dc:title')(root)
+    if titles:
+        title = titles[0].text
+        if title and title.strip():
+            mi.title = title.strip()
+    tags = []
+    for subject in XPath('//dc:subject')(root):
+        if subject.text and subject.text.strip():
+            tags.append(subject.text.strip().replace(',', '_'))
+    for keywords in XPath('//cp:keywords')(root):
+        if keywords.text and keywords.text.strip():
+            for x in keywords.text.split():
+                tags.extend(y.strip() for y in x.split(','))
+    if tags:
+        mi.tags = tags
+    authors = XPath('//dc:creator')(root)
+    aut = []
+    for author in authors:
+        if author.text and author.text.strip():
+            aut.extend(string_to_authors(author.text))
+    if aut:
+        mi.authors = aut
+
+    desc = XPath('//dc:description')(root)
+    if desc:
+        raw = etree.tostring(desc[0], method='text', encoding=unicode)
+        mi.comments = raw
+
+    langs = []
+    for lang in XPath('//dc:language')(root):
+        if lang.text and lang.text.strip():
+            l = canonicalize_lang(lang.text)
+            if l:
+                langs.append(l)
+    if langs:
+        mi.languages = langs
+
+def read_app_props(raw, mi):
+    root = fromstring(raw)
+    company = root.xpath('//*[local-name()="Company"]')
+    if company and company[0].text and company[0].text.strip():
+        mi.publisher = company[0].text.strip()
+# }}}
+
+class DOCX(object):
+
+    def __init__(self, path_or_stream, log=None, extract=True):
+        stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb')
+        self.name = getattr(stream, 'name', None) or '<stream>'
+        self.log = log or default_log
+        if extract:
+            self.extract(stream)
+        else:
+            self.init_zipfile(stream)
+        self.read_content_types()
+        self.read_package_relationships()
+
+    def init_zipfile(self, stream):
+        self.zipf = ZipFile(stream)
+        self.names = frozenset(self.zipf.namelist())
+
+    def extract(self, stream):
+        self.tdir = PersistentTemporaryDirectory('docx_container')
+        try:
+            zf = ZipFile(stream)
+            zf.extractall(self.tdir)
+        except:
+            self.log.exception('DOCX appears to be invalid ZIP file, trying a'
+                    ' more forgiving ZIP parser')
+            from calibre.utils.localunzip import extractall
+            stream.seek(0)
+            extractall(stream, self.tdir)
+
+        self.names = {}
+        for f in walk(self.tdir):
+            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
+            self.names[name] = f
+
+    def exists(self, name):
+        return name in self.names
+
+    def read(self, name):
+        if hasattr(self, 'zipf'):
+            return self.zipf.open(name).read()
+        path = self.names[name]
+        with open(path, 'rb') as f:
+            return f.read()
+
+    def read_content_types(self):
+        try:
+            raw = self.read('[Content_Types].xml')
+        except KeyError:
+            raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name)
+        root = fromstring(raw)
+        self.content_types = {}
+        self.default_content_types = {}
+        for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'):
+            self.default_content_types[item.get('Extension').lower()] = item.get('ContentType')
+        for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'):
+            name = item.get('PartName').lstrip('/')
+            self.content_types[name] = item.get('ContentType')
+
+    def content_type(self, name):
+        if name in self.content_types:
+            return self.content_types[name]
+        ext = name.rpartition('.')[-1].lower()
+        if ext in self.default_content_types:
+            return self.default_content_types[ext]
+        return guess_type(name)[0]
+
+    def read_package_relationships(self):
+        try:
+            raw = self.read('_rels/.rels')
+        except KeyError:
+            raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name)
+        root = fromstring(raw)
+        self.relationships = {}
+        self.relationships_rmap = {}
+        for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
+            target = item.get('Target').lstrip('/')
+            typ = item.get('Type')
+            self.relationships[typ] = target
+            self.relationships_rmap[target] = typ
+
+    @property
+    def document_name(self):
+        name = self.relationships.get(DOCUMENT, None)
+        if name is None:
+            names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml'))
+            if not names:
+                raise InvalidDOCX('The file %s docx file has no main document' % self.name)
+            name = names[0]
+        return name
+
+    @property
+    def document(self):
+        return fromstring(self.read(self.document_name))
+
+    @property
+    def document_relationships(self):
+        name = self.document_name
+        base = '/'.join(name.split('/')[:-1])
+        by_id, by_type = {}, {}
+        parts = name.split('/')
+        name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels'])
+        try:
+            raw = self.read(name)
+        except KeyError:
+            pass
+        else:
+            root = fromstring(raw)
+            for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
+                target = '/'.join((base, item.get('Target').lstrip('/')))
+                typ = item.get('Type')
+                Id = item.get('Id')
+                by_id[Id] = by_type[typ] = target
+
+        return by_id, by_type
+
+    @property
+    def metadata(self):
+        mi = Metadata(_('Unknown'))
+        name = self.relationships.get(DOCPROPS, None)
+        if name is None:
+            names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml')
+            if names:
+                name = names[0]
+        if name:
+            try:
+                raw = self.read(name)
+            except KeyError:
+                pass
+            else:
+                read_doc_props(raw, mi)
+
+        name = self.relationships.get(APPPROPS, None)
+        if name is None:
+            names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml')
+            if names:
+                name = names[0]
+        if name:
+            try:
+                raw = self.read(name)
+            except KeyError:
+                pass
+            else:
+                read_app_props(raw, mi)
+
+        return mi
+
+    def close(self):
+        if hasattr(self, 'zipf'):
+            self.zipf.close()
+        else:
+            try:
+                shutil.rmtree(self.tdir)
+            except EnvironmentError:
+                pass
+
+if __name__ == '__main__':
+    d = DOCX(sys.argv[-1], extract=False)
+    print (d.metadata)
--- a/src/calibre/ebooks/docx/lcid.py
+++ b/src/calibre/ebooks/docx/lcid.py
@ -0,0 +1,233 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+lcid = {
+    1078: 'af',  # Afrikaans - South Africa
+    1052: 'sq',  # Albanian - Albania
+    1118: 'am',  # Amharic - Ethiopia
+    1025: 'ar',  # Arabic - Saudi Arabia
+    5121: 'ar',  # Arabic - Algeria
+    15361: 'ar',  # Arabic - Bahrain
+    3073: 'ar',  # Arabic - Egypt
+    2049: 'ar',  # Arabic - Iraq
+    11265: 'ar',  # Arabic - Jordan
+    13313: 'ar',  # Arabic - Kuwait
+    12289: 'ar',  # Arabic - Lebanon
+    4097: 'ar',  # Arabic - Libya
+    6145: 'ar',  # Arabic - Morocco
+    8193: 'ar',  # Arabic - Oman
+    16385: 'ar',  # Arabic - Qatar
+    10241: 'ar',  # Arabic - Syria
+    7169: 'ar',  # Arabic - Tunisia
+    14337: 'ar',  # Arabic - U.A.E.
+    9217: 'ar',  # Arabic - Yemen
+    1067: 'hy',  # Armenian - Armenia
+    1101: 'as',  # Assamese
+    2092: 'az',  # Azeri (Cyrillic)
+    1068: 'az',  # Azeri (Latin)
+    1069: 'eu',  # Basque
+    1059: 'be',  # Belarusian
+    1093: 'bn',  # Bengali (India)
+    2117: 'bn',  # Bengali (Bangladesh)
+    5146: 'bs',  # Bosnian (Bosnia/Herzegovina)
+    1026: 'bg',  # Bulgarian
+    1109: 'my',  # Burmese
+    1027: 'ca',  # Catalan
+    1116: 'chr',  # Cherokee - United States
+    2052: 'zh',  # Chinese - People's Republic of China
+    4100: 'zh',  # Chinese - Singapore
+    1028: 'zh',  # Chinese - Taiwan
+    3076: 'zh',  # Chinese - Hong Kong SAR
+    5124: 'zh',  # Chinese - Macao SAR
+    1050: 'hr',  # Croatian
+    4122: 'hr',  # Croatian (Bosnia/Herzegovina)
+    1029: 'cs',  # Czech
+    1030: 'da',  # Danish
+    1125: 'dv',  # Divehi
+    1043: 'nl',  # Dutch - Netherlands
+    2067: 'nl',  # Dutch - Belgium
+    1126: 'bin',  # Edo
+    1033: 'en',  # English - United States
+    2057: 'en',  # English - United Kingdom
+    3081: 'en',  # English - Australia
+    10249: 'en',  # English - Belize
+    4105: 'en',  # English - Canada
+    9225: 'en',  # English - Caribbean
+    15369: 'en',  # English - Hong Kong SAR
+    16393: 'en',  # English - India
+    14345: 'en',  # English - Indonesia
+    6153: 'en',  # English - Ireland
+    8201: 'en',  # English - Jamaica
+    17417: 'en',  # English - Malaysia
+    5129: 'en',  # English - New Zealand
+    13321: 'en',  # English - Philippines
+    18441: 'en',  # English - Singapore
+    7177: 'en',  # English - South Africa
+    11273: 'en',  # English - Trinidad
+    12297: 'en',  # English - Zimbabwe
+    1061: 'et',  # Estonian
+    1080: 'fo',  # Faroese
+    1065: None,  # TODO: Farsi
+    1124: 'fil',  # Filipino
+    1035: 'fi',  # Finnish
+    1036: 'fr',  # French - France
+    2060: 'fr',  # French - Belgium
+    11276: 'fr',  # French - Cameroon
+    3084: 'fr',  # French - Canada
+    9228: 'fr',  # French - Democratic Rep. of Congo
+    12300: 'fr',  # French - Cote d'Ivoire
+    15372: 'fr',  # French - Haiti
+    5132: 'fr',  # French - Luxembourg
+    13324: 'fr',  # French - Mali
+    6156: 'fr',  # French - Monaco
+    14348: 'fr',  # French - Morocco
+    58380: 'fr',  # French - North Africa
+    8204: 'fr',  # French - Reunion
+    10252: 'fr',  # French - Senegal
+    4108: 'fr',  # French - Switzerland
+    7180: 'fr',  # French - West Indies
+    1122: 'fy',  # Frisian - Netherlands
+    1127: None,  # TODO: Fulfulde - Nigeria
+    1071: 'mk',  # FYRO Macedonian
+    2108: 'ga',  # Gaelic (Ireland)
+    1084: 'gd',  # Gaelic (Scotland)
+    1110: 'gl',  # Galician
+    1079: 'ka',  # Georgian
+    1031: 'de',  # German - Germany
+    3079: 'de',  # German - Austria
+    5127: 'de',  # German - Liechtenstein
+    4103: 'de',  # German - Luxembourg
+    2055: 'de',  # German - Switzerland
+    1032: 'el',  # Greek
+    1140: 'gn',  # Guarani - Paraguay
+    1095: 'gu',  # Gujarati
+    1128: 'ha',  # Hausa - Nigeria
+    1141: 'haw',  # Hawaiian - United States
+    1037: 'he',  # Hebrew
+    1081: 'hi',  # Hindi
+    1038: 'hu',  # Hungarian
+    1129: None,  # TODO: Ibibio - Nigeria
+    1039: 'is',  # Icelandic
+    1136: 'ig',  # Igbo - Nigeria
+    1057: 'id',  # Indonesian
+    1117: 'iu',  # Inuktitut
+    1040: 'it',  # Italian - Italy
+    2064: 'it',  # Italian - Switzerland
+    1041: 'ja',  # Japanese
+    1099: 'kn',  # Kannada
+    1137: 'kr',  # Kanuri - Nigeria
+    2144: 'ks',  # Kashmiri
+    1120: 'ks',  # Kashmiri (Arabic)
+    1087: 'kk',  # Kazakh
+    1107: 'km',  # Khmer
+    1111: 'kok',  # Konkani
+    1042: 'ko',  # Korean
+    1088: 'ky',  # Kyrgyz (Cyrillic)
+    1108: 'lo',  # Lao
+    1142: 'la',  # Latin
+    1062: 'lv',  # Latvian
+    1063: 'lt',  # Lithuanian
+    1086: 'ms',  # Malay - Malaysia
+    2110: 'ms',  # Malay - Brunei Darussalam
+    1100: 'ml',  # Malayalam
+    1082: 'mt',  # Maltese
+    1112: 'mni',  # Manipuri
+    1153: 'mi',  # Maori - New Zealand
+    1102: 'mr',  # Marathi
+    1104: 'mn',  # Mongolian (Cyrillic)
+    2128: 'mn',  # Mongolian (Mongolian)
+    1121: 'ne',  # Nepali
+    2145: 'ne',  # Nepali - India
+    1044: 'no',  # Norwegian (Bokmￃﾥl)
+    2068: 'no',  # Norwegian (Nynorsk)
+    1096: 'or',  # Oriya
+    1138: 'om',  # Oromo
+    1145: 'pap',  # Papiamentu
+    1123: 'ps',  # Pashto
+    1045: 'pl',  # Polish
+    1046: 'pt',  # Portuguese - Brazil
+    2070: 'pt',  # Portuguese - Portugal
+    1094: 'pa',  # Punjabi
+    2118: 'pa',  # Punjabi (Pakistan)
+    1131: 'qu',  # Quecha - Bolivia
+    2155: 'qu',  # Quecha - Ecuador
+    3179: 'qu',  # Quecha - Peru
+    1047: 'rm',  # Rhaeto-Romanic
+    1048: 'ro',  # Romanian
+    2072: 'ro',  # Romanian - Moldava
+    1049: 'ru',  # Russian
+    2073: 'ru',  # Russian - Moldava
+    1083: 'se',  # Sami (Lappish)
+    1103: 'sa',  # Sanskrit
+    1132: 'nso',  # Sepedi
+    3098: 'sr',  # Serbian (Cyrillic)
+    2074: 'sr',  # Serbian (Latin)
+    1113: 'sd',  # Sindhi - India
+    2137: 'sd',  # Sindhi - Pakistan
+    1115: 'si',  # Sinhalese - Sri Lanka
+    1051: 'sk',  # Slovak
+    1060: 'sl',  # Slovenian
+    1143: 'so',  # Somali
+    1070: 'wen',  # Sorbian
+    3082: 'es',  # Spanish - Spain (Modern Sort)
+    1034: 'es',  # Spanish - Spain (Traditional Sort)
+    11274: 'es',  # Spanish - Argentina
+    16394: 'es',  # Spanish - Bolivia
+    13322: 'es',  # Spanish - Chile
+    9226: 'es',  # Spanish - Colombia
+    5130: 'es',  # Spanish - Costa Rica
+    7178: 'es',  # Spanish - Dominican Republic
+    12298: 'es',  # Spanish - Ecuador
+    17418: 'es',  # Spanish - El Salvador
+    4106: 'es',  # Spanish - Guatemala
+    18442: 'es',  # Spanish - Honduras
+    58378: 'es',  # Spanish - Latin America
+    2058: 'es',  # Spanish - Mexico
+    19466: 'es',  # Spanish - Nicaragua
+    6154: 'es',  # Spanish - Panama
+    15370: 'es',  # Spanish - Paraguay
+    10250: 'es',  # Spanish - Peru
+    20490: 'es',  # Spanish - Puerto Rico
+    21514: 'es',  # Spanish - United States
+    14346: 'es',  # Spanish - Uruguay
+    8202: 'es',  # Spanish - Venezuela
+    1072: None,  # TODO: Sutu
+    1089: 'sw',  # Swahili
+    1053: 'sv',  # Swedish
+    2077: 'sv',  # Swedish - Finland
+    1114: 'syr',  # Syriac
+    1064: 'tg',  # Tajik
+    1119: None,  # TODO: Tamazight (Arabic)
+    2143: None,  # TODO: Tamazight (Latin)
+    1097: 'ta',  # Tamil
+    1092: 'tt',  # Tatar
+    1098: 'te',  # Telugu
+    1054: 'th',  # Thai
+    2129: 'bo',  # Tibetan - Bhutan
+    1105: 'bo',  # Tibetan - People's Republic of China
+    2163: 'ti',  # Tigrigna - Eritrea
+    1139: 'ti',  # Tigrigna - Ethiopia
+    1073: 'ts',  # Tsonga
+    1074: 'tn',  # Tswana
+    1055: 'tr',  # Turkish
+    1090: 'tk',  # Turkmen
+    1152: 'ug',  # Uighur - China
+    1058: 'uk',  # Ukrainian
+    1056: 'ur',  # Urdu
+    2080: 'ur',  # Urdu - India
+    2115: 'uz',  # Uzbek (Cyrillic)
+    1091: 'uz',  # Uzbek (Latin)
+    1075: 've',  # Venda
+    1066: 'vi',  # Vietnamese
+    1106: 'cy',  # Welsh
+    1076: 'xh',  # Xhosa
+    1144: 'ii',  # Yi
+    1085: 'yi',  # Yiddish
+    1130: 'yo',  # Yoruba
+    1077: 'zu'  # Zulu
+}
--- a/src/calibre/ebooks/docx/names.py
+++ b/src/calibre/ebooks/docx/names.py
@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from lxml.etree import XPath as X
+
+DOCUMENT  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
+DOCPROPS  = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
+APPPROPS  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
+STYLES    = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
+NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
+
+namespaces = {
+    'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
+    'o': 'urn:schemas-microsoft-com:office:office',
+    've': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
+    # Text Content
+    'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
+    'w10': 'urn:schemas-microsoft-com:office:word',
+    'wne': 'http://schemas.microsoft.com/office/word/2006/wordml',
+    'xml': 'http://www.w3.org/XML/1998/namespace',
+    # Drawing
+    'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
+    'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
+    'mv': 'urn:schemas-microsoft-com:mac:vml',
+    'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
+    'v': 'urn:schemas-microsoft-com:vml',
+    'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
+    # Properties (core and extended)
+    'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties',
+    'dc': 'http://purl.org/dc/elements/1.1/',
+    'ep': 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties',
+    'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
+    # Content Types
+    'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
+    # Package Relationships
+    'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
+    'pr': 'http://schemas.openxmlformats.org/package/2006/relationships',
+    # Dublin Core document properties
+    'dcmitype': 'http://purl.org/dc/dcmitype/',
+    'dcterms': 'http://purl.org/dc/terms/'
+}
+
+def XPath(expr):
+    return X(expr, namespaces=namespaces)
+
+def is_tag(x, q):
+    tag = getattr(x, 'tag', x)
+    ns, name = q.partition(':')[0::2]
+    return '{%s}%s' % (namespaces.get(ns, None), name) == tag
+
+def barename(x):
+    return x.rpartition('}')[-1]
+
+def XML(x):
+    return '{%s}%s' % (namespaces['xml'], x)
+
+def get(x, attr, default=None):
+    ns, name = attr.partition(':')[0::2]
+    return x.attrib.get('{%s}%s' % (namespaces[ns], name), default)
+
--- a/src/calibre/ebooks/docx/numbering.py
+++ b/src/calibre/ebooks/docx/numbering.py
@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from calibre.ebooks.docx.block_styles import ParagraphStyle
+from calibre.ebooks.docx.char_styles import RunStyle
+from calibre.ebooks.docx.names import XPath, get
+
+STYLE_MAP = {
+    'aiueo': 'hiragana',
+    'aiueoFullWidth': 'hiragana',
+    'hebrew1': 'hebrew',
+    'iroha': 'katakana-iroha',
+    'irohaFullWidth': 'katakana-iroha',
+    'lowerLetter': 'lower-alpha',
+    'lowerRoman': 'lower-roman',
+    'none': 'none',
+    'upperLetter': 'upper-alpha',
+    'upperRoman': 'upper-roman',
+    'chineseCounting': 'cjk-ideographic',
+    'decimalZero': 'decimal-leading-zero',
+}
+
+class Level(object):
+
+    def __init__(self, lvl=None):
+        self.restart = None
+        self.start = 0
+        self.fmt = 'decimal'
+        self.para_link = None
+        self.paragraph_style = self.character_style = None
+
+        if lvl is not None:
+            self.read_from_xml(lvl)
+
+    def read_from_xml(self, lvl, override=False):
+        for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
+            try:
+                self.restart = int(get(lr, 'w:val'))
+            except (TypeError, ValueError):
+                pass
+
+        for lr in XPath('./w:start[@w:val]')(lvl):
+            try:
+                self.start = int(get(lr, 'w:val'))
+            except (TypeError, ValueError):
+                pass
+
+        lt = None
+        for lr in XPath('./w:lvlText[@w:val]')(lvl):
+            lt = get(lr, 'w:val')
+
+        for lr in XPath('./w:numFmt[@w:val]')(lvl):
+            val = get(lr, 'w:val')
+            if val == 'bullet':
+                self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
+            else:
+                self.fmt = STYLE_MAP.get(val, 'decimal')
+
+        for lr in XPath('./w:pStyle[@w:val]')(lvl):
+            self.para_link = get(lr, 'w:val')
+
+        for pPr in XPath('./w:pPr')(lvl):
+            ps = ParagraphStyle(pPr)
+            if self.paragraph_style is None:
+                self.paragraph_style = ps
+            else:
+                self.paragraph_style.update(ps)
+
+        for rPr in XPath('./w:rPr')(lvl):
+            ps = RunStyle(rPr)
+            if self.character_style is None:
+                self.character_style = ps
+            else:
+                self.character_style.update(ps)
+
+    def copy(self):
+        ans = Level()
+        for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style'):
+            setattr(ans, x, getattr(self, x))
+        return ans
+
+class NumberingDefinition(object):
+
+    def __init__(self, parent=None):
+        self.levels = {}
+        if parent is not None:
+            for lvl in XPath('./w:lvl')(parent):
+                try:
+                    ilvl = int(get(lvl, 'w:ilvl', 0))
+                except (TypeError, ValueError):
+                    ilvl = 0
+                self.levels[ilvl] = Level(lvl)
+
+    def copy(self):
+        ans = NumberingDefinition()
+        for l, lvl in self.levels.iteritems():
+            ans.levels[l] = lvl.copy()
+        return ans
+
+class Numbering(object):
+
+    def __init__(self):
+        self.definitions = {}
+        self.instances = {}
+
+    def __call__(self, root, styles):
+        ' Read all numbering style definitions '
+        lazy_load = {}
+        for an in XPath('./w:abstractNum[@w:abstractNumId]')(root):
+            an_id = get(an, 'w:abstractNumId')
+            nsl = XPath('./w:numStyleLink[@w:val]')(an)
+            if nsl:
+                lazy_load[an_id] = get(nsl[0], 'w:val')
+            else:
+                nd = NumberingDefinition(an)
+                self.definitions[an_id] = nd
+
+        def create_instance(n, definition):
+            nd = definition.copy()
+            for lo in XPath('./w:lvlOverride')(n):
+                ilvl = get(lo, 'w:ilvl')
+                for lvl in XPath('./w:lvl')(lo)[:1]:
+                    nilvl = get(lvl, 'w:ilvl')
+                    ilvl = nilvl if ilvl is None else ilvl
+                    alvl = nd.levels.get(ilvl, None)
+                    if alvl is None:
+                        alvl = Level()
+                    alvl.read_from_xml(lvl, override=True)
+
+        next_pass = {}
+        for n in XPath('./w:num[@w:numId]')(root):
+            an_id = None
+            num_id = get(n, 'w:numId')
+            for an in XPath('./w:abstractNumId[@w:val]')(n):
+                an_id = get(an, 'w:val')
+            d = self.definitions.get(an_id, None)
+            if d is None:
+                next_pass[num_id] = (an_id, n)
+                continue
+            self.instances[num_id] = create_instance(n, d)
+
+        numbering_links = styles.numbering_style_links
+        for an_id, style_link in lazy_load.iteritems():
+            num_id = numbering_links[style_link]
+            self.definitions[an_id] = self.instances[num_id].copy()
+
+        for num_id, (an_id, n) in next_pass.iteritems():
+            d = self.definitions.get(an_id, None)
+            if d is not None:
+                self.instances[num_id] = create_instance(n, d)
+
--- a/src/calibre/ebooks/docx/styles.py
+++ b/src/calibre/ebooks/docx/styles.py
@ -0,0 +1,284 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import textwrap
+from collections import OrderedDict, Counter
+
+from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
+from calibre.ebooks.docx.char_styles import RunStyle
+from calibre.ebooks.docx.names import XPath, get
+
+
+class Style(object):
+    '''
+    Class representing a <w:style> element. Can contain block, character, etc. styles.
+    '''
+
+    name_path = XPath('./w:name[@w:val]')
+    based_on_path = XPath('./w:basedOn[@w:val]')
+
+    def __init__(self, elem):
+        self.resolved = False
+        self.style_id = get(elem, 'w:styleId')
+        self.style_type = get(elem, 'w:type')
+        names = self.name_path(elem)
+        self.name = get(names[-1], 'w:val') if names else None
+        based_on = self.based_on_path(elem)
+        self.based_on = get(based_on[0], 'w:val') if based_on else None
+        if self.style_type == 'numbering':
+            self.based_on = None
+        self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'}
+
+        self.paragraph_style = self.character_style = None
+
+        if self.style_type in {'paragraph', 'character'}:
+            if self.style_type == 'paragraph':
+                for pPr in XPath('./w:pPr')(elem):
+                    ps = ParagraphStyle(pPr)
+                    if self.paragraph_style is None:
+                        self.paragraph_style = ps
+                    else:
+                        self.paragraph_style.update(ps)
+
+            for rPr in XPath('./w:rPr')(elem):
+                rs = RunStyle(rPr)
+                if self.character_style is None:
+                    self.character_style = rs
+                else:
+                    self.character_style.update(rs)
+
+        if self.style_type == 'numbering':
+            self.numbering_style_link = None
+            for x in XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem):
+                self.numbering_style_link = get(x, 'w:val')
+
+    def resolve_based_on(self, parent):
+        if parent.paragraph_style is not None:
+            if self.paragraph_style is None:
+                self.paragraph_style = ParagraphStyle()
+            self.paragraph_style.resolve_based_on(parent.paragraph_style)
+        if parent.character_style is not None:
+            if self.character_style is None:
+                self.character_style = RunStyle()
+            self.character_style.resolve_based_on(parent.character_style)
+
+
+class Styles(object):
+
+    '''
+    Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
+    '''
+
+    def __init__(self):
+        self.id_map = OrderedDict()
+        self.para_cache = {}
+        self.para_char_cache = {}
+        self.run_cache = {}
+        self.classes = {}
+        self.counter = Counter()
+        self.default_styles = {}
+        self.numbering_style_links = {}
+
+    def __iter__(self):
+        for s in self.id_map.itervalues():
+            yield s
+
+    def __getitem__(self, key):
+        return self.id_map[key]
+
+    def __len__(self):
+        return len(self.id_map)
+
+    def get(self, key, default=None):
+        return self.id_map.get(key, default)
+
+    def __call__(self, root):
+        for s in XPath('//w:style')(root):
+            s = Style(s)
+            if s.style_id:
+                self.id_map[s.style_id] = s
+            if s.is_default:
+                self.default_styles[s.style_type] = s
+            if s.style_type == 'numbering' and s.numbering_style_link:
+                self.numbering_style_links[s.style_id] = s.numbering_style_link
+
+        self.default_paragraph_style = self.default_character_style = None
+
+        for dd in XPath('./w:docDefaults')(root):
+            for pd in XPath('./w:pPrDefault')(dd):
+                for pPr in XPath('./w:pPr')(pd):
+                    ps = ParagraphStyle(pPr)
+                    if self.default_paragraph_style is None:
+                        self.default_paragraph_style = ps
+                    else:
+                        self.default_paragraph_style.update(ps)
+            for pd in XPath('./w:rPrDefault')(dd):
+                for pPr in XPath('./w:rPr')(pd):
+                    ps = RunStyle(pPr)
+                    if self.default_character_style is None:
+                        self.default_character_style = ps
+                    else:
+                        self.default_character_style.update(ps)
+
+        def resolve(s, p):
+            if p is not None:
+                if not p.resolved:
+                    resolve(p, self.get(p.based_on))
+                s.resolve_based_on(p)
+            s.resolved = True
+
+        for s in self:
+            if not s.resolved:
+                resolve(s, self.get(s.based_on))
+
+    def para_val(self, parent_styles, direct_formatting, attr):
+        val = getattr(direct_formatting, attr)
+        if val is inherit:
+            for ps in reversed(parent_styles):
+                pval = getattr(ps, attr)
+                if pval is not inherit:
+                    val = pval
+                    break
+        return val
+
+    def run_val(self, parent_styles, direct_formatting, attr):
+        val = getattr(direct_formatting, attr)
+        if val is not inherit:
+            return val
+        if attr in direct_formatting.toggle_properties:
+            val = False
+            for rs in parent_styles:
+                pval = getattr(rs, attr)
+                if pval is True:
+                    val ^= True
+            return val
+        for rs in reversed(parent_styles):
+            rval = getattr(rs, attr)
+            if rval is not inherit:
+                return rval
+        return val
+
+    def resolve_paragraph(self, p):
+        ans = self.para_cache.get(p, None)
+        if ans is None:
+            ans = self.para_cache[p] = ParagraphStyle()
+            ans.style_name = None
+            direct_formatting = None
+            for pPr in XPath('./w:pPr')(p):
+                ps = ParagraphStyle(pPr)
+                if direct_formatting is None:
+                    direct_formatting = ps
+                else:
+                    direct_formatting.update(ps)
+
+            if direct_formatting is None:
+                direct_formatting = ParagraphStyle()
+            parent_styles = []
+            if self.default_paragraph_style is not None:
+                parent_styles.append(self.default_paragraph_style)
+
+            default_para = self.default_styles.get('paragraph', None)
+            if direct_formatting.linked_style is not None:
+                ls = self.get(direct_formatting.linked_style)
+                if ls is not None:
+                    ans.style_name = ls.name
+                    ps = ls.paragraph_style
+                    if ps is not None:
+                        parent_styles.append(ps)
+                    if ls.character_style is not None:
+                        self.para_char_cache[p] = ls.character_style
+            elif default_para is not None:
+                if default_para.paragraph_style is not None:
+                    parent_styles.append(default_para.paragraph_style)
+                if default_para.character_style is not None:
+                    self.para_char_cache[p] = default_para.character_style
+
+            for attr in ans.all_properties:
+                setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
+        return ans
+
+    def resolve_run(self, r):
+        ans = self.run_cache.get(r, None)
+        if ans is None:
+            p = r.getparent()
+            ans = self.run_cache[r] = RunStyle()
+            direct_formatting = None
+            for rPr in XPath('./w:rPr')(r):
+                rs = RunStyle(rPr)
+                if direct_formatting is None:
+                    direct_formatting = rs
+                else:
+                    direct_formatting.update(rs)
+
+            if direct_formatting is None:
+                direct_formatting = RunStyle()
+
+            parent_styles = []
+            default_char = self.default_styles.get('character', None)
+            if self.default_character_style is not None:
+                parent_styles.append(self.default_character_style)
+            pstyle = self.para_char_cache.get(p, None)
+            if pstyle is not None:
+                parent_styles.append(pstyle)
+            if direct_formatting.linked_style is not None:
+                ls = self.get(direct_formatting.linked_style).character_style
+                if ls is not None:
+                    parent_styles.append(ls)
+            elif default_char is not None and default_char.character_style is not None:
+                parent_styles.append(default_char.character_style)
+
+            for attr in ans.all_properties:
+                setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
+
+        return ans
+
+    def resolve(self, obj):
+        if obj.tag.endswith('}p'):
+            return self.resolve_paragraph(obj)
+        if obj.tag.endswith('}r'):
+            return self.resolve_run(obj)
+
+    def resolve_numbering(self, numbering):
+        pass  # TODO: Implement this
+
+    def register(self, css, prefix):
+        h = hash(tuple(css.iteritems()))
+        ans, _ = self.classes.get(h, (None, None))
+        if ans is None:
+            self.counter[prefix] += 1
+            ans = '%s_%d' % (prefix, self.counter[prefix])
+            self.classes[h] = (ans, css)
+        return ans
+
+    def generate_classes(self):
+        for bs in self.para_cache.itervalues():
+            css = bs.css
+            if css:
+                self.register(css, 'block')
+        for bs in self.run_cache.itervalues():
+            css = bs.css
+            if css:
+                self.register(css, 'text')
+
+    def class_name(self, css):
+        h = hash(tuple(css.iteritems()))
+        return self.classes.get(h, (None, None))[0]
+
+    def generate_css(self):
+        prefix = textwrap.dedent(
+            '''\
+            p { margin: 0; padding: 0; text-indent: 1.5em }
+            ''')
+
+        ans = []
+        for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
+            b = ('\t%s: %s;' % (k, v) for k, v in css.iteritems())
+            b = '\n'.join(b)
+            ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';')))
+        return prefix + '\n' + '\n'.join(ans)
+
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@ -0,0 +1,233 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import sys, os, re
+
+from lxml import html
+from lxml.html.builder import (
+    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
+
+from calibre.ebooks.docx.container import DOCX, fromstring
+from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING
+from calibre.ebooks.docx.styles import Styles, inherit
+from calibre.ebooks.docx.numbering import Numbering
+from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
+
+class Text:
+
+    def __init__(self, elem, attr, buf):
+        self.elem, self.attr, self.buf = elem, attr, buf
+
+    def add_elem(self, elem):
+        setattr(self.elem, self.attr, ''.join(self.buf))
+        self.elem, self.attr, self.buf = elem, 'tail', []
+
+class Convert(object):
+
+    def __init__(self, path_or_stream, dest_dir=None, log=None):
+        self.docx = DOCX(path_or_stream, log=log)
+        self.log = self.docx.log
+        self.dest_dir = dest_dir or os.getcwdu()
+        self.mi = self.docx.metadata
+        self.body = BODY()
+        self.styles = Styles()
+        self.object_map = {}
+        self.html = HTML(
+            HEAD(
+                META(charset='utf-8'),
+                TITLE(self.mi.title or _('Unknown')),
+                LINK(rel='stylesheet', type='text/css', href='docx.css'),
+            ),
+            self.body
+        )
+        self.html.text='\n\t'
+        self.html[0].text='\n\t\t'
+        self.html[0].tail='\n'
+        for child in self.html[0]:
+            child.tail = '\n\t\t'
+        self.html[0][-1].tail = '\n\t'
+        self.html[1].text = self.html[1].tail = '\n'
+        lang = canonicalize_lang(self.mi.language)
+        if lang and lang != 'und':
+            lang = lang_as_iso639_1(lang)
+            if lang:
+                self.html.set('lang', lang)
+
+    def __call__(self):
+        doc = self.docx.document
+        relationships_by_id, relationships_by_type = self.docx.document_relationships
+        self.read_styles(relationships_by_type)
+        for top_level in XPath('/w:document/w:body/*')(doc):
+            if is_tag(top_level, 'w:p'):
+                p = self.convert_p(top_level)
+                self.body.append(p)
+            elif is_tag(top_level, 'w:tbl'):
+                pass  # TODO: tables
+            elif is_tag(top_level, 'w:sectPr'):
+                pass  # TODO: Last section properties
+            else:
+                self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
+        if len(self.body) > 0:
+            self.body.text = '\n\t'
+            for child in self.body:
+                child.tail = '\n\t'
+            self.body[-1].tail = '\n'
+
+        self.styles.generate_classes()
+        for html_obj, obj in self.object_map.iteritems():
+            style = self.styles.resolve(obj)
+            if style is not None:
+                css = style.css
+                if css:
+                    cls = self.styles.class_name(css)
+                    if cls:
+                        html_obj.set('class', cls)
+        self.write()
+
+    def read_styles(self, relationships_by_type):
+
+        def get_name(rtype, defname):
+            name = relationships_by_type.get(rtype, None)
+            if name is None:
+                cname = self.docx.document_name.split('/')
+                cname[-1] = defname
+                if self.docx.exists(cname):
+                    name = name
+            return name
+
+        nname = get_name(NUMBERING, 'numbering.xml')
+        sname = get_name(STYLES, 'styles.xml')
+        numbering = Numbering()
+
+        if sname is not None:
+            try:
+                raw = self.docx.read(sname)
+            except KeyError:
+                self.log.warn('Styles %s do not exist' % sname)
+            else:
+                self.styles(fromstring(raw))
+
+        if nname is not None:
+            try:
+                raw = self.docx.read(nname)
+            except KeyError:
+                self.log.warn('Numbering styles %s do not exist' % nname)
+            else:
+                numbering(fromstring(raw), self.styles)
+
+        self.styles.resolve_numbering(numbering)
+
+    def write(self):
+        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
+        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
+            f.write(raw)
+        css = self.styles.generate_css()
+        if css:
+            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
+                f.write(css.encode('utf-8'))
+
+    def convert_p(self, p):
+        dest = P()
+        style = self.styles.resolve_paragraph(p)
+        for run in XPath('descendant::w:r')(p):
+            span = self.convert_run(run)
+            dest.append(span)
+
+        m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
+        if m is not None:
+            n = min(1, max(6, int(m.group(1))))
+            dest.tag = 'h%d' % n
+
+        if style.direction == 'rtl':
+            dest.set('dir', 'rtl')
+
+        border_runs = []
+        common_borders = []
+        for span in dest:
+            run = self.object_map[span]
+            style = self.styles.resolve_run(run)
+            if not border_runs or border_runs[-1][1].same_border(style):
+                border_runs.append((span, style))
+            elif border_runs:
+                if len(border_runs) > 1:
+                    common_borders.append(border_runs)
+                border_runs = []
+
+        for border_run in common_borders:
+            spans = []
+            bs = {}
+            for span, style in border_run:
+                c = style.css
+                spans.append(span)
+                for x in ('width', 'color', 'style'):
+                    val = c.pop('border-%s' % x, None)
+                    if val is not None:
+                        bs['border-%s' % x] = val
+            if bs:
+                cls = self.styles.register(bs, 'text_border')
+                wrapper = self.wrap_elems(spans, SPAN())
+                wrapper.set('class', cls)
+
+        self.object_map[dest] = p
+        return dest
+
+    def wrap_elems(self, elems, wrapper):
+        p = elems[0].getparent()
+        idx = p.index(elems[0])
+        p.insert(idx, wrapper)
+        wrapper.tail = elems[-1].tail
+        elems[-1].tail = None
+        for elem in elems:
+            p.remove(elem)
+            wrapper.append(elem)
+
+    def convert_run(self, run):
+        ans = SPAN()
+        ans.run = run
+        text = Text(ans, 'text', [])
+
+        for child in run:
+            if is_tag(child, 'w:t'):
+                if not child.text:
+                    continue
+                space = child.get(XML('space'), None)
+                if space == 'preserve':
+                    text.add_elem(SPAN(child.text, style="whitespace:pre-wrap"))
+                    ans.append(text.elem)
+                else:
+                    text.buf.append(child.text)
+            elif is_tag(child, 'w:cr'):
+                text.add_elem(BR())
+                ans.append(text.elem)
+            elif is_tag(child, 'w:br'):
+                typ = child.get('type', None)
+                if typ in {'column', 'page'}:
+                    br = BR(style='page-break-after:always')
+                else:
+                    clear = child.get('clear', None)
+                    if clear in {'all', 'left', 'right'}:
+                        br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
+                    else:
+                        br = BR()
+                text.add_elem(br)
+                ans.append(text.elem)
+        if text.buf:
+            setattr(text.elem, text.attr, ''.join(text.buf))
+
+        style = self.styles.resolve_run(run)
+        if style.vert_align in {'superscript', 'subscript'}:
+            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
+        if style.lang is not inherit:
+            ans.lang = style.lang
+        self.object_map[ans] = run
+        return ans
+
+if __name__ == '__main__':
+    from calibre.utils.logging import default_log
+    default_log.filter_level = default_log.DEBUG
+    Convert(sys.argv[-1], log=default_log)()
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -136,7 +136,7 @@ class FB2MLizer(object):
            metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
-            metadata['author'] = u'<author><first-name></first-name><last-name><last-name></author>'
+            metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'

        metadata['keywords'] = u''
        tags = list(map(unicode, self.oeb_book.metadata.subject))
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -178,6 +178,8 @@ class Metadata(object):
        return key in object.__getattribute__(self, '_data')

    def deepcopy(self):
+        ''' Do not use this method unless you know what you are doing, if you want to create a simple clone of
+        this object, use :method:`deepcopy_metadata` instead. '''
        m = Metadata(None)
        m.__dict__ = copy.deepcopy(self.__dict__)
        object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))
--- a/src/calibre/ebooks/metadata/book/formatter.py
+++ b/src/calibre/ebooks/metadata/book/formatter.py
@ -14,16 +14,15 @@ class SafeFormat(TemplateFormatter):

    def __init__(self):
        TemplateFormatter.__init__(self)
-        from calibre.ebooks.metadata.book.base import field_metadata
-        self.field_metadata = field_metadata

    def get_value(self, orig_key, args, kwargs):
        if not orig_key:
            return ''
        key = orig_key = orig_key.lower()
-        if key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS and \
-                key not in ALL_METADATA_FIELDS:
-            key = self.field_metadata.search_term_to_field_key(key)
+        if (key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS and
+                key not in ALL_METADATA_FIELDS):
+            from calibre.ebooks.metadata.book.base import field_metadata
+            key = field_metadata.search_term_to_field_key(key)
            if key is None or (self.book and
                                key not in self.book.all_field_keys()):
                if hasattr(self.book, orig_key):
--- a/src/calibre/ebooks/metadata/docx.py
+++ b/src/calibre/ebooks/metadata/docx.py
@ -7,70 +7,21 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-from lxml import etree
+from calibre.ebooks.docx.container import DOCX

-from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.zipfile import ZipFile
 from calibre.utils.magick.draw import identify_data
-from calibre.ebooks.oeb.base import DC11_NS
-from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
-
-NSMAP = {'dc':DC11_NS,
-'cp':'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'}
-
-def XPath(expr):
-    return etree.XPath(expr, namespaces=NSMAP)
-
-def _read_doc_props(raw, mi):
-    from calibre.ebooks.metadata import string_to_authors
-    root = etree.fromstring(raw, parser=RECOVER_PARSER)
-    titles = XPath('//dc:title')(root)
-    if titles:
-        title = titles[0].text
-        if title and title.strip():
-            mi.title = title.strip()
-    tags = []
-    for subject in XPath('//dc:subject')(root):
-        if subject.text and subject.text.strip():
-            tags.append(subject.text.strip().replace(',', '_'))
-    for keywords in XPath('//cp:keywords')(root):
-        if keywords.text and keywords.text.strip():
-            for x in keywords.text.split():
-                tags.extend(y.strip() for y in x.split(','))
-    if tags:
-        mi.tags = tags
-    authors = XPath('//dc:creator')(root)
-    aut = []
-    for author in authors:
-        if author.text and author.text.strip():
-            aut.extend(string_to_authors(author.text))
-    if aut:
-        mi.authors = aut
-
-    desc = XPath('//dc:description')(root)
-    if desc:
-        raw = etree.tostring(desc[0], method='text', encoding=unicode)
-        mi.comments = raw
-
-def _read_app_props(raw, mi):
-    root = etree.fromstring(raw, parser=RECOVER_PARSER)
-    company = root.xpath('//*[local-name()="Company"]')
-    if company and company[0].text and company[0].text.strip():
-        mi.publisher = company[0].text.strip()

 def get_metadata(stream):
+    c = DOCX(stream, extract=False)
+    mi = c.metadata
+    c.close()
+    stream.seek(0)
+    cdata = None
    with ZipFile(stream, 'r') as zf:
-
-        mi = Metadata(_('Unknown'))
-        cdata = None
-
        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
-            if zi.filename.lower() == 'docprops/core.xml':
-                _read_doc_props(zf.read(zi), mi)
-            elif zi.filename.lower() == 'docprops/app.xml':
-                _read_app_props(zf.read(zi), mi)
-            elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
+            if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@ -13,12 +13,12 @@ from calibre.utils.date import parse_date
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.metadata import MetaInformation, check_isbn
 from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
-from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from calibre.utils.localization import canonicalize_lang

 NULL_INDEX = 0xffffffff

-class EXTHHeader(object): # {{{
+class EXTHHeader(object):  # {{{

    def __init__(self, raw, codec, title):
        self.doctype = raw[:4]
@ -62,7 +62,7 @@ class EXTHHeader(object): # {{{
            elif idx == 502:
                # last update time
                pass
-            elif idx == 503: # Long title
+            elif idx == 503:  # Long title
                # Amazon seems to regard this as the definitive book title
                # rather than the title from the PDB header. In fact when
                # sending MOBI files through Amazon's email service if the
@ -72,7 +72,7 @@ class EXTHHeader(object): # {{{
                    title = self.decode(content)
                except:
                    pass
-            elif idx == 524: # Lang code
+            elif idx == 524:  # Lang code
                try:
                    lang = content.decode(codec)
                    lang = canonicalize_lang(lang)
@ -83,22 +83,22 @@ class EXTHHeader(object): # {{{
            #else:
            #    print 'unknown record', idx, repr(content)
        if title:
-            self.mi.title = replace_entities(clean_ascii_chars(title))
+            self.mi.title = replace_entities(clean_xml_chars(clean_ascii_chars(title)))

    def process_metadata(self, idx, content, codec):
        if idx == 100:
            if self.mi.is_null('authors'):
                self.mi.authors = []
-            au = self.decode(content).strip()
+            au = clean_xml_chars(self.decode(content).strip())
            self.mi.authors.append(au)
            if self.mi.is_null('author_sort') and re.match(r'\S+?\s*,\s+\S+', au.strip()):
                self.mi.author_sort = au.strip()
        elif idx == 101:
-            self.mi.publisher = self.decode(content).strip()
+            self.mi.publisher = clean_xml_chars(self.decode(content).strip())
            if self.mi.publisher in {'Unknown', _('Unknown')}:
                self.mi.publisher = None
        elif idx == 103:
-            self.mi.comments  = self.decode(content).strip()
+            self.mi.comments  = clean_xml_chars(self.decode(content).strip())
        elif idx == 104:
            raw = check_isbn(self.decode(content).strip().replace('-', ''))
            if raw:
@ -106,7 +106,7 @@ class EXTHHeader(object): # {{{
        elif idx == 105:
            if not self.mi.tags:
                self.mi.tags = []
-            self.mi.tags.extend([x.strip() for x in self.decode(content).split(';')])
+            self.mi.tags.extend([x.strip() for x in clean_xml_chars(self.decode(content)).split(';')])
            self.mi.tags = list(set(self.mi.tags))
        elif idx == 106:
            try:
@ -114,8 +114,8 @@ class EXTHHeader(object): # {{{
            except:
                pass
        elif idx == 108:
-            self.mi.book_producer = self.decode(content).strip()
-        elif idx == 112: # dc:source set in some EBSP amazon samples
+            self.mi.book_producer = clean_xml_chars(self.decode(content).strip())
+        elif idx == 112:  # dc:source set in some EBSP amazon samples
            try:
                content = content.decode(codec).strip()
                isig = 'urn:isbn:'
@ -131,7 +131,7 @@ class EXTHHeader(object): # {{{
                        self.mi.application_id = self.mi.uuid = cid
            except:
                pass
-        elif idx == 113: # ASIN or other id
+        elif idx == 113:  # ASIN or other id
            try:
                self.uuid = content.decode('ascii')
                self.mi.set_identifier('mobi-asin', self.uuid)
@ -242,7 +242,7 @@ class BookHeader(object):
                # if cnt is 1 or less, fdst section number can be garbage
                if self.fdstcnt <= 1:
                    self.fdstidx = NULL_INDEX
-            else: # Null values
+            else:  # Null values
                self.skelidx = self.dividx = self.othidx = self.fdstidx = \
                        NULL_INDEX

--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -24,6 +24,7 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \
                                    urlnormalize, BINARY_MIME, \
                                    OEBError, OEBBook, DirContainer
 from calibre.ebooks.oeb.writer import OEBWriter
+from calibre.utils.cleantext import clean_xml_chars
 from calibre.utils.localization import get_lang
 from calibre.ptempfile import TemporaryDirectory
 from calibre.constants import __appname__, __version__
@ -106,7 +107,7 @@ class OEBReader(object):
        try:
            opf = etree.fromstring(data)
        except etree.XMLSyntaxError:
-            data = xml_replace_entities(data, encoding=None)
+            data = xml_replace_entities(clean_xml_chars(data), encoding=None)
            try:
                opf = etree.fromstring(data)
                self.logger.warn('OPF contains invalid HTML named entities')
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -187,7 +187,7 @@ def _config(): # {{{
    c.add_opt('shortcuts_search_history', default=[],
        help='Search history for the keyboard preferences')
    c.add_opt('jobs_search_history', default=[],
-        help='Search history for the keyboard preferences')
+        help='Search history for the tweaks preferences')
    c.add_opt('tweaks_search_history', default=[],
        help='Search history for tweaks')
    c.add_opt('worker_limit', default=6,
--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -116,11 +116,12 @@ class MovedDialog(QDialog): # {{{
        self.cd.setIcon(QIcon(I('document_open.png')))
        self.cd.clicked.connect(self.choose_dir)
        l.addWidget(self.cd, 2, 1, 1, 1)
-        self.bb = QDialogButtonBox(self)
+        self.bb = QDialogButtonBox(QDialogButtonBox.Abort)
        b = self.bb.addButton(_('Library moved'), self.bb.AcceptRole)
        b.setIcon(QIcon(I('ok.png')))
        b = self.bb.addButton(_('Forget library'), self.bb.RejectRole)
        b.setIcon(QIcon(I('edit-clear.png')))
+        b.clicked.connect(self.forget_library)
        self.bb.accepted.connect(self.accept)
        self.bb.rejected.connect(self.reject)
        l.addWidget(self.bb, 3, 0, 1, ncols)
@ -132,9 +133,8 @@ class MovedDialog(QDialog): # {{{
        if d is not None:
            self.loc.setText(d)

-    def reject(self):
+    def forget_library(self):
        self.stats.remove(self.location)
-        QDialog.reject(self)

    def accept(self):
        newloc = unicode(self.loc.text())
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -240,9 +240,10 @@ class EditMetadataAction(InterfaceAction):
                        opf, cov = id_map[book_id]
                        cfile = mi.cover
                        mi.cover, mi.cover_data = None, (None, None)
-                        with open(opf, 'wb') as f:
-                            f.write(metadata_to_opf(mi))
-                        if cfile:
+                        if opf is not None:
+                            with open(opf, 'wb') as f:
+                                f.write(metadata_to_opf(mi))
+                        if cfile and cov:
                            shutil.copyfile(cfile, cov)
                            os.remove(cfile)
                    nid_map[book_id] = id_map[book_id]
--- a/src/calibre/gui2/actions/show_quickview.py
+++ b/src/calibre/gui2/actions/show_quickview.py
@ -38,6 +38,13 @@ class ShowQuickviewAction(InterfaceAction):
                Quickview(self.gui, self.gui.library_view, index)
            self.current_instance.show()

+    def change_quickview_column(self, idx):
+        self.show_quickview()
+        if self.current_instance:
+            if self.current_instance.is_closed:
+                return
+            self.current_instance.change_quickview_column.emit(idx)
+
    def library_changed(self, db):
        if self.current_instance and not self.current_instance.is_closed:
            self.current_instance.set_database(db)
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@ -28,7 +28,10 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):

        all_formats = set(all_formats)
        self.calibre_known_formats = device.FORMATS
-        self.device_name = device.get_gui_name()
+        try:
+            self.device_name = device.get_gui_name()
+        except TypeError:
+            self.device_name = getattr(device, 'gui_name', None) or _('Device')
        if device.USER_CAN_ADD_NEW_FORMATS:
            all_formats = set(all_formats) | set(BOOK_EXTENSIONS)

--- a/src/calibre/gui2/dialogs/quickview.py
+++ b/src/calibre/gui2/dialogs/quickview.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'

 from PyQt4.Qt import (Qt, QDialog, QAbstractItemView, QTableWidgetItem,
                      QListWidgetItem, QByteArray, QCoreApplication,
-                      QApplication)
+                      QApplication, pyqtSignal)

 from calibre.customize.ui import find_plugin
 from calibre.gui2 import gprefs
@ -44,6 +44,8 @@ class TableItem(QTableWidgetItem):

 class Quickview(QDialog, Ui_Quickview):

+    change_quickview_column   = pyqtSignal(object)
+
    def __init__(self, gui, view, row):
        QDialog.__init__(self, gui, flags=Qt.Window)
        Ui_Quickview.__init__(self)
@ -105,6 +107,7 @@ class Quickview(QDialog, Ui_Quickview):
        self.refresh(row)

        self.view.clicked.connect(self.slave)
+        self.change_quickview_column.connect(self.slave)
        QCoreApplication.instance().aboutToQuit.connect(self.save_state)
        self.search_button.clicked.connect(self.do_search)
        view.model().new_bookdisplay_data.connect(self.book_was_changed)
@ -164,6 +167,8 @@ class Quickview(QDialog, Ui_Quickview):

        if vals:
            self.no_valid_items = False
+            if self.db.field_metadata[key]['datatype'] == 'rating':
+                vals = unicode(vals/2)
            if not isinstance(vals, list):
                vals = [vals]
            vals.sort(key=sort_key)
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -197,6 +197,16 @@ class BooksView(QTableView): # {{{
        elif action.startswith('align_'):
            alignment = action.partition('_')[-1]
            self._model.change_alignment(column, alignment)
+        elif action == 'quickview':
+            from calibre.customize.ui import find_plugin
+            qv = find_plugin('Show Quickview')
+            if qv:
+                rows = self.selectionModel().selectedRows()
+                if len(rows) > 0:
+                    current_row = rows[0].row()
+                    current_col = self.column_map.index(column)
+                    index = self.model().index(current_row, current_col)
+                    qv.actual_plugin_.change_quickview_column(index)

        self.save_state()

@ -240,7 +250,14 @@ class BooksView(QTableView): # {{{
                            a.setCheckable(True)
                            a.setChecked(True)

-
+            if self._model.db.field_metadata[col]['is_category']:
+                act = self.column_header_context_menu.addAction(_('Quickview column %s') %
+                        name,
+                    partial(self.column_header_context_handler, action='quickview',
+                        column=col))
+                rows = self.selectionModel().selectedRows()
+                if len(rows) > 1:
+                    act.setEnabled(False)

            hidden_cols = [self.column_map[i] for i in
                    range(self.column_header.count()) if
--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@ -172,7 +172,10 @@ class Tweaks(QAbstractListModel, SearchQueryParser): # {{{
            doc.append(line[1:].strip())
        doc = '\n'.join(doc)
        while True:
-            line = lines[pos]
+            try:
+                line = lines[pos]
+            except IndexError:
+                break
            if not line.strip():
                break
            spidx1 = line.find(' ')
--- a/src/calibre/gui2/search_restriction_mixin.py
+++ b/src/calibre/gui2/search_restriction_mixin.py
@ -12,6 +12,7 @@ from PyQt4.Qt import (

 from calibre.gui2 import error_dialog, question_dialog
 from calibre.gui2.widgets import ComboBoxWithHelp
+from calibre.utils.config_base import tweaks
 from calibre.utils.icu import sort_key
 from calibre.utils.search_query_parser import ParseException
 from calibre.utils.search_query_parser import saved_searches
@ -549,6 +550,9 @@ class SearchRestrictionMixin(object):
                restriction = ''
            self._apply_search_restriction(restriction, r)

+    def clear_additional_restriction(self):
+        self._apply_search_restriction('', '')
+
    def _apply_search_restriction(self, restriction, name):
        self.saved_search.clear()
        # The order below is important. Set the restriction, force a '' search
@ -561,6 +565,10 @@ class SearchRestrictionMixin(object):
        self.set_number_of_books_shown()
        self.current_view().setFocus(Qt.OtherFocusReason)
        self.set_window_title()
+        v = self.current_view()
+        if not v.currentIndex().isValid():
+            v.set_current_row()
+        v.refresh_book_details()

    def set_number_of_books_shown(self):
        db = self.library_view.model().db
@ -569,8 +577,9 @@ class SearchRestrictionMixin(object):
            rows = self.current_view().row_count()
            rbc = max(rows, db.data.get_search_restriction_book_count())
            t = _("({0} of {1})").format(rows, rbc)
-            self.search_count.setStyleSheet(
-                'QLabel { border-radius: 8px; background-color: yellow; }')
+            if tweaks['highlight_virtual_library_book_count']:
+                self.search_count.setStyleSheet(
+                    'QLabel { border-radius: 8px; background-color: yellow; }')
        else:  # No restriction or not library view
            if not self.search.in_a_search():
                t = _("(all books)")
--- a/src/calibre/gui2/store/stores/koobe_plugin.py
+++ b/src/calibre/gui2/store/stores/koobe_plugin.py
@ -1,14 +1,13 @@
 # -*- coding: utf-8 -*-

 from __future__ import (division, absolute_import, print_function)
-store_version = 1  # Needed for dynamic plugin loading
+store_version = 2  # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2013, Tomasz Długosz <tomek3d@gmail.com>'
 __docformat__ = 'restructuredtext en'

 import urllib
-from base64 import b64encode
 from contextlib import closing

 from lxml import html
@ -25,19 +24,21 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class KoobeStore(BasicStoreConfig, StorePlugin):

    def open(self, parent=None, detail_item=None, external=False):
-        aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/15/58/'
+        #aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/15/58/'
        url = 'http://www.koobe.pl/'

-        aff_url = aff_root + str(b64encode(url))
+        #aff_url = aff_root + str(b64encode(url))

        detail_url = None
        if detail_item:
-            detail_url = aff_root + str(b64encode(detail_item))
+            detail_url = detail_item #aff_root + str(b64encode(detail_item))

        if external or self.config.get('open_external', False):
-            open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
+            #open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
+            open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
        else:
-            d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
+            #d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
+            d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else url)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
--- a/src/calibre/gui2/store/stores/woblink_plugin.py
+++ b/src/calibre/gui2/store/stores/woblink_plugin.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading

 __license__ = 'GPL 3'
 __copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'

 import re
 import urllib
-from base64 import b64encode
 from contextlib import closing

 from lxml import html
@ -26,19 +25,21 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
 class WoblinkStore(BasicStoreConfig, StorePlugin):

    def open(self, parent=None, detail_item=None, external=False):
-        aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/16/58/'
+        #aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/16/58/'
        url = 'http://woblink.com/publication'

-        aff_url = aff_root + str(b64encode(url))
+        #aff_url = aff_root + str(b64encode(url))
        detail_url = None

        if detail_item:
-            detail_url = aff_root + str(b64encode('http://woblink.com' + detail_item))
+            detail_url = 'http://woblink.com' + detail_item #aff_root + str(b64encode('http://woblink.com' + detail_item))

        if external or self.config.get('open_external', False):
-            open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
+            #open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
+            open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
        else:
-            d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
+            #d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
+            d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else url)
            d.setWindowTitle(self.name)
            d.set_tags(self.config.get('tags', ''))
            d.exec_()
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -265,6 +265,27 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin,  # {{{
                action=self.esc_action)
        self.esc_action.triggered.connect(self.esc)

+        self.shift_esc_action = QAction(self)
+        self.addAction(self.shift_esc_action)
+        self.keyboard.register_shortcut('focus book list',
+                _('Focus the book list'), default_keys=('Shift+Esc',),
+                action=self.shift_esc_action)
+        self.shift_esc_action.triggered.connect(self.shift_esc)
+
+        self.ctrl_esc_action = QAction(self)
+        self.addAction(self.ctrl_esc_action)
+        self.keyboard.register_shortcut('clear virtual library',
+                _('Clear the virtual library'), default_keys=('Ctrl+Esc',),
+                action=self.ctrl_esc_action)
+        self.ctrl_esc_action.triggered.connect(self.ctrl_esc)
+
+        self.alt_esc_action = QAction(self)
+        self.addAction(self.alt_esc_action)
+        self.keyboard.register_shortcut('clear additional restriction',
+                _('Clear the additional restriction'), default_keys=('Alt+Esc',),
+                action=self.alt_esc_action)
+        self.alt_esc_action.triggered.connect(self.clear_additional_restriction)
+
        ####################### Start spare job server ########################
        QTimer.singleShot(1000, self.add_spare_server)

@ -377,6 +398,13 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin,  # {{{
    def esc(self, *args):
        self.clear_button.click()

+    def shift_esc(self):
+        self.current_view().setFocus(Qt.OtherFocusReason)
+
+    def ctrl_esc(self):
+        self.apply_virtual_library()
+        self.current_view().setFocus(Qt.OtherFocusReason)
+
    def start_smartdevice(self):
        message = None
        if self.device_manager.get_option('smartdevice', 'autostart'):
--- a/src/calibre/gui2/viewer/config.py
+++ b/src/calibre/gui2/viewer/config.py
@ -139,12 +139,20 @@ class ConfigDialog(QDialog, Ui_Dialog):
        self.load_options(opts)
        self.init_load_themes()

+        self.clear_search_history_button.clicked.connect(self.clear_search_history)
+
+    def clear_search_history(self):
+        from calibre.gui2 import config
+        config['viewer_search_history'] = []
+
    def save_theme(self):
        themename, ok = QInputDialog.getText(self, _('Theme name'),
                _('Choose a name for this theme'))
-        if not ok: return
+        if not ok:
+            return
        themename = unicode(themename).strip()
-        if not themename: return
+        if not themename:
+            return
        c = config('')
        c.add_opt('theme_name_xxx', default=themename)
        self.save_options(c)
@ -247,7 +255,8 @@ class ConfigDialog(QDialog, Ui_Dialog):
    def update_sample_colors(self):
        for x in ('text', 'background'):
            val = getattr(self, 'current_%s_color'%x)
-            if not val: val = 'inherit' if x == 'text' else 'transparent'
+            if not val:
+                val = 'inherit' if x == 'text' else 'transparent'
            ss = 'QLabel { %s: %s }'%('background-color' if x == 'background'
                    else 'color', val)
            getattr(self, '%s_color_sample'%x).setStyleSheet(ss)
--- a/src/calibre/gui2/viewer/config.ui
+++ b/src/calibre/gui2/viewer/config.ui
@ -68,7 +68,7 @@ QToolBox::tab:hover {
            <x>0</x>
            <y>0</y>
            <width>811</width>
-            <height>352</height>
+            <height>380</height>
           </rect>
          </property>
          <attribute name="label">
@ -240,8 +240,8 @@ QToolBox::tab:hover {
           <rect>
            <x>0</x>
            <y>0</y>
-            <width>397</width>
-            <height>232</height>
+            <width>811</width>
+            <height>380</height>
           </rect>
          </property>
          <attribute name="label">
@ -370,8 +370,8 @@ QToolBox::tab:hover {
           <rect>
            <x>0</x>
            <y>0</y>
-            <width>352</width>
-            <height>176</height>
+            <width>811</width>
+            <height>380</height>
           </rect>
          </property>
          <attribute name="label">
@ -446,8 +446,8 @@ QToolBox::tab:hover {
           <rect>
            <x>0</x>
            <y>0</y>
-            <width>351</width>
-            <height>76</height>
+            <width>811</width>
+            <height>380</height>
           </rect>
          </property>
          <attribute name="label">
@ -525,8 +525,8 @@ QToolBox::tab:hover {
           <rect>
            <x>0</x>
            <y>0</y>
-            <width>410</width>
-            <height>120</height>
+            <width>811</width>
+            <height>380</height>
           </rect>
          </property>
          <attribute name="label">
@ -596,8 +596,8 @@ QToolBox::tab:hover {
           <rect>
            <x>0</x>
            <y>0</y>
-            <width>352</width>
-            <height>151</height>
+            <width>811</width>
+            <height>380</height>
           </rect>
          </property>
          <attribute name="label">
@ -628,27 +628,34 @@ QToolBox::tab:hover {
             </property>
            </widget>
           </item>
-           <item row="3" column="0" colspan="2">
+           <item row="5" column="0">
+            <widget class="QPushButton" name="clear_search_history_button">
+             <property name="text">
+              <string>Clear search history</string>
+             </property>
+            </widget>
+           </item>
+           <item row="2" column="0">
+            <widget class="QCheckBox" name="opt_show_controls">
+             <property name="text">
+              <string>Show &amp;controls in the viewer window</string>
+             </property>
+            </widget>
+           </item>
+           <item row="3" column="0">
            <widget class="QCheckBox" name="opt_remember_window_size">
             <property name="text">
              <string>Remember last used &amp;window size and layout</string>
             </property>
            </widget>
           </item>
-           <item row="4" column="0" colspan="2">
+           <item row="4" column="0">
            <widget class="QCheckBox" name="opt_remember_current_page">
             <property name="text">
              <string>Remember the &amp;current page when quitting</string>
             </property>
            </widget>
           </item>
-           <item row="2" column="0" colspan="2">
-            <widget class="QCheckBox" name="opt_show_controls">
-             <property name="text">
-              <string>Show &amp;controls in the viewer window</string>
-             </property>
-            </widget>
-           </item>
          </layout>
         </widget>
        </widget>
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -940,6 +940,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
    def do_config(self):
        self.view.config(self)
        self.load_theme_menu()
+        from calibre.gui2 import config
+        if not config['viewer_search_history']:
+            self.search.clear_history()

    def bookmark(self, *args):
        num = 1
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -113,7 +113,7 @@ class KindleDX(Kindle):
    id = 'kindledx'

 class KindleFire(KindleDX):
-    name = 'Kindle Fire'
+    name = 'Kindle Fire and Fire HD'
    id = 'kindle_fire'
    output_profile = 'kindle_fire'
    supports_color = True
@ -431,7 +431,8 @@ class KindlePage(QWizardPage, KindleUI):
            default = ac[2]
            if x.strip().endswith('@kindle.com'):
                accs.append((x, default))
-                if default: has_default = True
+                if default:
+                    has_default = True
        if has_default:
            accs = [x for x in accs if x[1]]
        if accs:
@ -450,7 +451,8 @@ class KindlePage(QWizardPage, KindleUI):
        if (len(parts) >= 2 and parts[0] and self.send_email_widget.set_email_settings(True)):
            conf = smtp_prefs()
            accounts = conf.parse().accounts
-            if not accounts: accounts = {}
+            if not accounts:
+                accounts = {}
            for y in accounts.values():
                y[2] = False
            accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1', True, True]
@ -484,9 +486,9 @@ class StanzaPage(QWizardPage, StanzaUI):
            c = server_config()
            c.set('port', p)

-
    def set_port(self, *args):
-        if not self.content_server.isChecked(): return
+        if not self.content_server.isChecked():
+            return
        import socket
        s = socket.socket()
        with closing(s):
@ -518,8 +520,7 @@ class DevicePage(QWizardPage, DeviceUI):
        self.manufacturer_view.setModel(self.man_model)
        previous = dynamic.get('welcome_wizard_device', False)
        if previous:
-            previous = [x for x in get_devices() if \
-                    x.id == previous]
+            previous = [x for x in get_devices() if x.id == previous]
            if not previous:
                previous = [Device]
            previous = previous[0]
@ -841,7 +842,6 @@ class FinishPage(QWizardPage, FinishUI):
        pass


-
 class Wizard(QWizard):

    BUTTON_TEXTS = {
@ -859,7 +859,6 @@ class Wizard(QWizard):
            _('&Finish')
            _('Commit')

-
    def __init__(self, parent):
        QWizard.__init__(self, parent)
        self.setWindowTitle(__appname__+' '+_('welcome wizard'))
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -61,7 +61,6 @@ class ContentServer(object):
                 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
        return lm.replace('month', month[updated.month])

-
    def sort(self, items, field, order):
        field = self.db.data.sanitize_sort_field_name(field)
        if field not in self.db.field_metadata.sortable_field_keys():
@ -77,7 +76,7 @@ class ContentServer(object):
        try:
            id = int(id)
        except ValueError:
-            id = id.rpartition('_')[-1].partition('.')[0]
+            id = id.rpartition('.')[0].rpartition('_')[-1]
            match = re.search(r'\d+', id)
            if not match:
                raise cherrypy.HTTPError(404, 'id:%s not an integer'%id)
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/ber.po
+++ b/src/calibre/translations/ber.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/Show More
+++ b/Show More