0.9.8+

2025-06-23 15:30:45 -04:00 · 2012-12-06 04:55:14 -07:00 · 2012-12-06 04:55:14 -07:00 · 69f8b36eae
commit 69f8b36eae
parent b13bbe7335 36754d28ab
121 changed files with 53394 additions and 48586 deletions
--- a/recipes/aksiyon_derigisi.recipe
+++ b/recipes/aksiyon_derigisi.recipe
@ -20,6 +20,7 @@ class Aksiyon (BasicNewsRecipe):
    auto_cleanup = True
    cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
    masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
+    ignore_duplicate_articles = { 'title', 'url' }
    remove_empty_feeds= True
    feeds          = [
                      ( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
--- a/recipes/endgadget.recipe
+++ b/recipes/endgadget.recipe
@ -21,10 +21,11 @@ class Engadget(BasicNewsRecipe):
    use_embedded_content  = False
    remove_javascript     = True
    remove_empty_feeds    = True
+    auto_cleanup = True

-    keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
-    remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
-    remove_tags_after =  [dict(name='div', attrs={'class':['post_footer']})]
+    #keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
+    #remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
+    #remove_tags_after =  [dict(name='div', attrs={'class':['post_footer']})]

    feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]

--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@ -6,22 +6,41 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 nytimes.com
 '''
 import re, string, time
-from calibre import entity_to_unicode, strftime
+from calibre import strftime
 from datetime import timedelta, date
+from time import sleep
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

-
 class NYTimes(BasicNewsRecipe):

+    recursions=1 # set this to zero to omit Related articles lists
+
+    # set getTechBlogs to True to include the technology blogs
+    # set tech_oldest_article to control article age
+    # set tech_max_articles_per_feed to control article count
+    getTechBlogs = True
+    remove_empty_feeds = True
+    tech_oldest_article = 14
+    tech_max_articles_per_feed = 25
+
+
    # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
    headlinesOnly = True

-    # set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
-    # number of days old an article can be for inclusion. If oldest_article = 0 all articles
-    # will be included. Note: oldest_article is ignored if webEdition = False
+    # set webEdition to True for the Web edition of the newspaper. Set oldest_web_article to the
+    # number of days old an article can be for inclusion. If oldest_web_article = None all articles
+    # will be included. Note: oldest_web_article is ignored if webEdition = False
    webEdition = False
-    oldest_article = 7
+    oldest_web_article = 7
+
+    # download higher resolution images than the small thumbnails typically included in the article
+    # the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
+    useHighResImages = True
+
+    # replace paid Kindle Version:  the name will be changed to "The New York Times" to cause
+    # previous paid versions of the new york times to best sent to the back issues folder on the kindle
+    replaceKindleVersion = False

    # includeSections: List of sections to include. If empty, all sections found will be included.
    # Otherwise, only the sections named will be included. For example,
@ -82,57 +101,68 @@ class NYTimes(BasicNewsRecipe):
                    ('Education',u'education'),
                    ('Multimedia',u'multimedia'),
                    (u'Obituaries',u'obituaries'),
-                    (u'Sunday Magazine',u'magazine'),
-                    (u'Week in Review',u'weekinreview')]
+                    (u'Sunday Magazine',u'magazine')
+                    ]
+
+    tech_feeds = [
+               (u'Tech - Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
+               (u'Tech - Bits', u'http://bits.blogs.nytimes.com/feed/'),
+               (u'Tech - Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
+               (u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
+                   ]


    if headlinesOnly:
        title='New York Times Headlines'
-        description = 'Headlines from the New York Times. Needs a subscription from http://www.nytimes.com'
-        needs_subscription = 'optional'
+        description = 'Headlines from the New York Times'
+        needs_subscription = False
    elif webEdition:
        title='New York Times (Web)'
        description = 'New York Times on the Web'
-        needs_subscription = True
+        needs_subscription = False
+    elif replaceKindleVersion:
+        title='The New York Times'
+        description = 'Today\'s New York Times'
+        needs_subscription = False
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
-        needs_subscription = True
+        needs_subscription = False

-
-    month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
-
-    def decode_us_date(self,datestr):
-        udate = datestr.strip().lower().split()
+    def decode_url_date(self,url):
+        urlitems = url.split('/')
        try:
-            m = self.month_list.index(udate[0])+1
+            d = date(int(urlitems[3]),int(urlitems[4]),int(urlitems[5]))
        except:
-            return date.today()
-        d = int(udate[1])
-        y = int(udate[2])
            try:
-            d = date(y,m,d)
+                d = date(int(urlitems[4]),int(urlitems[5]),int(urlitems[6]))
            except:
-            d = date.today
+                return None
        return d

-    earliest_date = date.today() - timedelta(days=oldest_article)
+    if oldest_web_article is None:
+        earliest_date = date.today()
+    else:
+        earliest_date = date.today() - timedelta(days=oldest_web_article)
+    oldest_article = 365 # by default, a long time ago

    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
    requires_version = (0, 7, 5)
-
+    encoding = 'utf-8'

    timefmt = ''
-    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+
+    simultaneous_downloads = 1
+
    cover_margins = (18,18,'grey99')

    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
-    remove_tags = [dict(attrs={'class':[
+    remove_tags = [
+                    dict(attrs={'class':[
                                        'articleFooter',
                                        'articleTools',
-                            'columnGroup doubleRule',
                                        'columnGroup singleRule',
                                        'columnGroup last',
                                        'columnGroup  last',
@ -140,7 +170,6 @@ class NYTimes(BasicNewsRecipe):
                                        'dottedLine',
                                        'entry-meta',
                                        'entry-response module',
-                            'icon enlargeThis',
                                        'leftNavTabs',
                                        'metaFootnote',
                                        'module box nav',
@ -150,10 +179,43 @@ class NYTimes(BasicNewsRecipe):
                                        'relatedSearchesModule',
                                        'side_tool',
                                        'singleAd',
+                                        'entry entry-utility', #added for DealBook
+                                        'entry-tags', #added for DealBook
+                                        'footer promos clearfix', #added for DealBook
+                                        'footer links clearfix', #added for DealBook
+                                        'tabsContainer', #added for other blog downloads
+                                        'column lastColumn', #added for other blog downloads
+                                        'pageHeaderWithLabel', #added for other gadgetwise downloads
+                                        'column two', #added for other blog downloads
+                                        'column two last', #added for other blog downloads
+                                        'column three', #added for other blog downloads
+                                        'column three last', #added for other blog downloads
+                                        'column four',#added for other blog downloads
+                                        'column four last',#added for other blog downloads
+                                        'column last', #added for other blog downloads
+                                        'entry entry-related',
+                                        'subNavigation tabContent active', #caucus blog navigation
+                                        'mediaOverlay slideshow',
+                                        'wideThumb',
+                                        'video', #added 02-11-2011
+                                        'videoHeader',#added 02-11-2011
+                                        'articleInlineVideoHolder', #added 02-11-2011
+                                        'assetCompanionAd',
                                        re.compile('^subNavigation'),
                                        re.compile('^leaderboard'),
                                        re.compile('^module'),
+                                        re.compile('commentCount')
                                        ]}),
+                    dict(name='div', attrs={'class':re.compile('toolsList')}),  # bits
+                    dict(name='div', attrs={'class':re.compile('postNavigation')}),  # bits
+                    dict(name='div', attrs={'class':'tweet'}),
+                    dict(name='span', attrs={'class':'commentCount meta'}),
+                    dict(name='div', attrs={'id':'header'}),
+                    dict(name='div', attrs={'id':re.compile('commentsContainer')}),  # bits, pogue, gadgetwise, open
+                    dict(name='ul', attrs={'class':re.compile('entry-tools')}),  # pogue, gadgetwise
+                    dict(name='div', attrs={'class':re.compile('nocontent')}),  # pogue, gadgetwise
+                    dict(name='div', attrs={'id':re.compile('respond')}), # open
+                    dict(name='div', attrs={'class':re.compile('entry-tags')}), # pogue
                    dict(id=[
                            'adxLeaderboard',
                            'adxSponLink',
@ -183,22 +245,29 @@ class NYTimes(BasicNewsRecipe):
                            'side_index',
                            'side_tool',
                            'toolsRight',
+                            'skybox', #added for DealBook
+                            'TopAd', #added for DealBook
+                            'related-content', #added for DealBook
                            ]),
                    dict(name=['script', 'noscript', 'style','form','hr'])]
    no_stylesheets = True
    extra_css = '''
                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .timestamp { text-align: left; font-size: small; }
-                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .timestamp { font-weight: normal; text-align: left; font-size: 50%; }
+                .caption { font-size: 50%; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
                a:link {text-decoration: none; }
+                .date{font-size: 50%; }
+                .update{font-size: 50%; }
                .articleBody { }
-                .authorId {text-align: left; }
+                .authorId {text-align: left; font-size: 50%; }
                .image {text-align: center;}
-                .source {text-align: left; }'''
+                .aside {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;}
+                .asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
+                .source {text-align: left; font-size: x-small; }'''


    articles = {}
@ -237,7 +306,7 @@ class NYTimes(BasicNewsRecipe):
    def exclude_url(self,url):
        if not url.startswith("http"):
            return True
-        if not url.endswith(".html"):
+        if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
            return True
        if 'nytimes.com' not in url:
            return True
@ -280,88 +349,91 @@ class NYTimes(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
-        if self.username is not None and self.password is not None:
-            br.open('http://www.nytimes.com/auth/login')
-            br.form = br.forms().next()
-            br['userid']   = self.username
-            br['password'] = self.password
-            raw = br.submit().read()
-            if 'Please try again' in raw:
-                raise Exception('Your username and password are incorrect')
        return br

-    def skip_ad_pages(self, soup):
-        # Skip ad pages served before actual article
-        skip_tag = soup.find(True, {'name':'skip'})
-        if skip_tag is not None:
-            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
-            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
-            url += '?pagewanted=all'
-            self.log.warn("Skipping ad to article at '%s'" % url)
-            return self.index_to_soup(url, raw=True)
+##    This doesn't work (and probably never did). It either gets another serve of the advertisement,
+##    or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding.
+##
+##    def skip_ad_pages(self, soup):
+##        # Skip ad pages served before actual article
+##        skip_tag = soup.find(True, {'name':'skip'})
+##        if skip_tag is not None:
+##            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
+##            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+##            url += '?pagewanted=all'
+##            self.log.warn("Skipping ad to article at '%s'" % url)
+##            return self.index_to_soup(url, raw=True)

+
+    cover_tag = 'NY_NYT'
    def get_cover_url(self):
-        cover = None
-        st = time.localtime()
-        year = str(st.tm_year)
-        month = "%.2d" % st.tm_mon
-        day = "%.2d" % st.tm_mday
-        cover = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/nytfrontpage/scan.jpg'
+        from datetime import timedelta, date
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
+        br = BasicNewsRecipe.get_browser()
+        daysback=1
+        try:
+            br.open(cover)
+        except:
+            while daysback<7:
+                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.cover_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
+                    daysback = daysback+1
+                    continue
+                break
+        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover

+    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+
    def short_title(self):
        return self.title

-    def index_to_soup(self, url_or_raw, raw=False):
-        '''
-        OVERRIDE of class method
-        deals with various page encodings between index and articles
-        '''
-        def get_the_soup(docEncoding, url_or_raw, raw=False) :
+
+    def article_to_soup(self, url_or_raw, raw=False):
+        from contextlib import closing
+        import copy
+        from calibre.ebooks.chardet import xml_to_unicode
        if re.match(r'\w+://', url_or_raw):
            br = self.clone_browser(self.browser)
-                f = br.open_novisit(url_or_raw)
+            open_func = getattr(br, 'open_novisit', br.open)
+            with closing(open_func(url_or_raw)) as f:
                _raw = f.read()
-                f.close()
            if not _raw:
                raise RuntimeError('Could not fetch index from %s'%url_or_raw)
        else:
            _raw = url_or_raw
        if raw:
            return _raw
-
        if not isinstance(_raw, unicode) and self.encoding:
-                _raw = _raw.decode(docEncoding, 'replace')
-            massage = list(BeautifulSoup.MARKUP_MASSAGE)
-            massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
-            return BeautifulSoup(_raw, markupMassage=massage)
+            if callable(self.encoding):
+                _raw = self.encoding(_raw)
+            else:
+                _raw = _raw.decode(self.encoding, 'replace')

-        # Entry point
-        soup = get_the_soup( self.encoding, url_or_raw )
-        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
-        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
-        if docEncoding == '' :
-            docEncoding = self.encoding
+        nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
+        nmassage.extend(self.preprocess_regexps)
+        nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')]
+        # Some websites have buggy doctype declarations that mess up beautifulsoup
+        # Remove comments as they can leave detritus when extracting tags leaves
+        # multiple nested comments
+        nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
+        usrc = xml_to_unicode(_raw, self.verbose, strip_encoding_pats=True)[0]
+        usrc = self.preprocess_raw_html(usrc, url_or_raw)
+        return BeautifulSoup(usrc, markupMassage=nmassage)

-        if self.verbose > 2:
-            self.log( "  document encoding: '%s'" % docEncoding)
-        if docEncoding != self.encoding :
-            soup = get_the_soup(docEncoding, url_or_raw)
-
-        return soup

    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
-            massaged = re.sub("&","&", massaged)
+            massaged = re.sub("&#038;","&", massaged)
+            massaged = re.sub("&amp;","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
@ -383,6 +455,16 @@ class NYTimes(BasicNewsRecipe):
        if self.filterDuplicates:
            if url in self.url_list:
                return
+        if self.webEdition:
+            date_tag = self.decode_url_date(url)
+            if date_tag is not None:
+                if  self.oldest_web_article is not None:
+                    if date_tag < self.earliest_date:
+                        self.log("Skipping article %s" % url)
+                        return
+            else:
+                self.log("Skipping article %s" % url)
+                return
        self.url_list.append(url)
        title = self.tag_to_string(a, use_alt=True).strip()
        description = ''
@ -407,6 +489,31 @@ class NYTimes(BasicNewsRecipe):
                            description=description, author=author,
                            content=''))

+    def get_tech_feeds(self,ans):
+        if self.getTechBlogs:
+            tech_articles = {}
+            key_list = []
+            save_oldest_article = self.oldest_article
+            save_max_articles_per_feed = self.max_articles_per_feed
+            self.oldest_article = self.tech_oldest_article
+            self.max_articles_per_feed = self.tech_max_articles_per_feed
+            self.feeds = self.tech_feeds
+            tech = self.parse_feeds()
+            self.oldest_article = save_oldest_article
+            self.max_articles_per_feed = save_max_articles_per_feed
+            self.feeds = None
+            for f in tech:
+                key_list.append(f.title)
+                tech_articles[f.title] = []
+                for a in f.articles:
+                    tech_articles[f.title].append(
+                        dict(title=a.title, url=a.url, date=a.date,
+                            description=a.summary, author=a.author,
+                            content=a.content))
+            tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
+            for x in tech_ans:
+                ans.append(x)
+        return ans

    def parse_web_edition(self):

@ -418,31 +525,41 @@ class NYTimes(BasicNewsRecipe):
            if sec_title in self.excludeSections:
                print "SECTION EXCLUDED: ",sec_title
                continue
-            print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
+            try:
                soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
+            except:
+                continue
+            print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
+
            self.key = sec_title
            # Find each article
            for div in soup.findAll(True,
-                attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
-                if div['class'] in ['story', 'story headline'] :
+                attrs={'class':['section-headline', 'ledeStory', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
+                if div['class'] in ['story', 'story headline', 'storyHeader'] :
                    self.handle_article(div)
+                elif div['class'] == 'ledeStory':
+                    divsub = div.find('div','storyHeader')
+                    if divsub is not None:
+                        self.handle_article(divsub)
+                        ulrefer = div.find('ul','refer')
+                        if ulrefer is not None:
+                            for lidiv in ulrefer.findAll('li'):
+                                self.handle_article(lidiv)
                elif div['class'] == 'headlinesOnly multiline flush':
                    for lidiv in div.findAll('li'):
                        self.handle_article(lidiv)

        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(self.ans)
+        return self.filter_ans(self.get_tech_feeds(self.ans))


    def parse_todays_index(self):

        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-
        skipping = False
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
-
            if div['class'] in ['section-headline','sectionHeader']:
                self.key = string.capwords(self.feed_title(div))
                self.key = self.key.replace('Op-ed','Op-Ed')
@ -466,7 +583,7 @@ class NYTimes(BasicNewsRecipe):
                        self.handle_article(lidiv)

        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(self.ans)
+        return self.filter_ans(self.get_tech_feeds(self.ans))

    def parse_headline_index(self):

@ -514,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
                    for h3_item in search_div.findAll('h3'):
                        byline = h3_item.h6
                        if byline is not None:
-                            author = self.tag_to_string(byline,usa_alt=False)
+                            author = self.tag_to_string(byline,use_alt=False)
                        else:
                            author = ''
                        a = h3_item.find('a', href=True)
@ -540,7 +657,7 @@ class NYTimes(BasicNewsRecipe):
                        self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))

        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(self.ans)
+        return self.filter_ans(self.get_tech_feeds(self.ans))

    def parse_index(self):
        if self.headlinesOnly:
@ -550,32 +667,190 @@ class NYTimes(BasicNewsRecipe):
        else:
            return self.parse_todays_index()

-    def strip_anchors(self,soup):
+    def strip_anchors(self,soup,kill_all=False):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
-                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+                    if kill_all or (self.recursions==0):
+                        a.replaceWith(self.tag_to_string(a,False))
+                    else:
+                        if a.has_key('href'):
+                            if a['href'].startswith('http://www.nytimes'):
+                                if not a['href'].endswith('pagewanted=all'):
+                                    url = re.sub(r'\?.*', '', a['href'])
+                                    if self.exclude_url(url):
+                                        a.replaceWith(self.tag_to_string(a,False))
+                                    else:
+                                        a['href'] = url+'?pagewanted=all'
+                            elif not (a['href'].startswith('http://pogue') or \
+                                      a['href'].startswith('http://bits') or \
+                                      a['href'].startswith('http://travel') or \
+                                      a['href'].startswith('http://business') or \
+                                      a['href'].startswith('http://tech') or \
+                                      a['href'].startswith('http://health') or \
+                                      a['href'].startswith('http://dealbook') or \
+                                      a['href'].startswith('http://open')):
+                                a.replaceWith(self.tag_to_string(a,False))
+        return soup
+
+    def handle_tags(self,soup):
+        try:
+            print("HANDLE TAGS: TITLE = "+self.tag_to_string(soup.title))
+        except:
+            print("HANDLE TAGS: NO TITLE")
+        if soup is None:
+            print("ERROR: handle_tags received NoneType")
+            return None
+
+##        print("HANDLING AD FORWARD:")
+##        print(soup)
+        if self.keep_only_tags:
+            body = Tag(soup, 'body')
+            try:
+                if isinstance(self.keep_only_tags, dict):
+                    self.keep_only_tags = [self.keep_only_tags]
+                for spec in self.keep_only_tags:
+                    for tag in soup.find('body').findAll(**spec):
+                        body.insert(len(body.contents), tag)
+                soup.find('body').replaceWith(body)
+            except AttributeError: # soup has no body element
+                pass
+
+        def remove_beyond(tag, next):
+            while tag is not None and getattr(tag, 'name', None) != 'body':
+                after = getattr(tag, next)
+                while after is not None:
+                    ns = getattr(tag, next)
+                    after.extract()
+                    after = ns
+                tag = tag.parent
+
+        if self.remove_tags_after is not None:
+            rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
+            for spec in rt:
+                tag = soup.find(**spec)
+                remove_beyond(tag, 'nextSibling')
+
+        if self.remove_tags_before is not None:
+            tag = soup.find(**self.remove_tags_before)
+            remove_beyond(tag, 'previousSibling')
+
+        for kwds in self.remove_tags:
+            for tag in soup.findAll(**kwds):
+                tag.extract()
+
        return soup


    def preprocess_html(self, soup):
+        print("PREPROCESS TITLE="+self.tag_to_string(soup.title))
+        skip_tag = soup.find(True, {'name':'skip'})
+        if skip_tag is not None:
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+            url += '?pagewanted=all'
+            self.log.warn("Skipping ad to article at '%s'" % url)
+            sleep(5)
+            soup = self.handle_tags(self.article_to_soup(url))

-        if self.webEdition & (self.oldest_article>0):
-            date_tag = soup.find(True,attrs={'class': ['dateline','date']})
-            if date_tag:
-                date_str = self.tag_to_string(date_tag,use_alt=False)
-                date_str = date_str.replace('Published:','')
-                date_items = date_str.split(',')
-                try:
-                    datestring = date_items[0]+' '+date_items[1]
-                    article_date = self.decode_us_date(datestring)
-                except:
-                    article_date = date.today()
-                if article_date < self.earliest_date:
-                    self.log("Skipping article dated %s" % date_str)
-                    return None
+        # check if the article is from one of the tech blogs
+        blog=soup.find('div',attrs={'id':['pogue','bits','gadgetwise','open']})
+
+        if blog is not None:
+            old_body = soup.find('body')
+            new_body=Tag(soup,'body')
+            new_body.append(soup.find('div',attrs={'id':'content'}))
+            new_body.find('div',attrs={'id':'content'})['id']='blogcontent' # identify for postprocess_html
+            old_body.replaceWith(new_body)
+            for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
+                if divr.find(text=re.compile('Sign up')):
+                    divr.extract()
+            divr = soup.find('div',attrs={'id':re.compile('related-content')})
+            if divr is not None:
+            # handle related articles
+                rlist = []
+                ul = divr.find('ul')
+                if ul is not None:
+                    for li in ul.findAll('li'):
+                        atag = li.find('a')
+                        if atag is not None:
+                            if atag['href'].startswith('http://pogue') or atag['href'].startswith('http://bits') or \
+                                atag['href'].startswith('http://open'):
+                                atag.find(text=True).replaceWith(self.massageNCXText(self.tag_to_string(atag,False)))
+                                rlist.append(atag)
+                divr.extract()
+                if rlist != []:
+                    asidediv = Tag(soup,'div',[('class','aside')])
+                    if soup.find('hr') is None:
+                        asidediv.append(Tag(soup,'hr'))
+                    h4 = Tag(soup,'h4',[('class','asidenote')])
+                    h4.insert(0,"Related Posts")
+                    asidediv.append(h4)
+                    ul = Tag(soup,'ul')
+                    for r in rlist:
+                        li = Tag(soup,'li',[('class','aside')])
+                        r['class'] = 'aside'
+                        li.append(r)
+                        ul.append(li)
+                    asidediv.append(ul)
+                    asidediv.append(Tag(soup,'hr'))
+                    smain = soup.find('body')
+                    smain.append(asidediv)
+            for atag in soup.findAll('a'):
+                img = atag.find('img')
+                if img is not None:
+                    atag.replaceWith(img)
+                elif not atag.has_key('href'):
+                    atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
+                elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
+                              atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
+                    atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
+            hdr = soup.find('address')
+            if hdr is not None:
+                hdr.name='span'
+            for span_credit in soup.findAll('span','credit'):
+                sp = Tag(soup,'span')
+                span_credit.replaceWith(sp)
+                sp.append(Tag(soup,'br'))
+                sp.append(span_credit)
+                sp.append(Tag(soup,'br'))
+
+        else: # nytimes article
+
+            related = [] # these will be the related articles
+            first_outer = None # first related outer tag
+            first_related = None # first related tag
+            for outerdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
+                for rdiv in soup.findAll('div','columnGroup doubleRule'):
+                    if rdiv.find('h3') is not None:
+                        if self.tag_to_string(rdiv.h3,False).startswith('Related'):
+                            rdiv.h3.find(text=True).replaceWith("Related articles")
+                            rdiv.h3['class'] = 'asidenote'
+                            for litag in rdiv.findAll('li'):
+                                if litag.find('a') is not None:
+                                    if litag.find('a')['href'].startswith('http://www.nytimes.com'):
+                                        url = re.sub(r'\?.*', '', litag.find('a')['href'])
+                                        litag.find('a')['href'] = url+'?pagewanted=all'
+                                        litag.extract()
+                                        related.append(litag)
+                                        if first_related is None:
+                                            first_related = rdiv
+                                            first_outer = outerdiv
+                                    else:
+                                        litag.extract()
+            if related != []:
+                for r in related:
+                    if r.h6: # don't want the anchor inside a h6 tag
+                        r.h6.replaceWith(r.h6.a)
+                    first_related.ul.append(r)
+                first_related.insert(0,Tag(soup,'hr'))
+                first_related.append(Tag(soup,'hr'))
+                first_related['class'] = 'aside'
+                first_outer.replaceWith(first_related) # replace the outer tag with the related tag
+
+            for rdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
+                rdiv.extract()

            kicker_tag = soup.find(attrs={'class':'kicker'})
            if kicker_tag: # remove Op_Ed author head shots
@ -584,9 +859,77 @@ class NYTimes(BasicNewsRecipe):
                    img_div = soup.find('div','inlineImage module')
                    if img_div:
                        img_div.extract()
-        return self.strip_anchors(soup)

-    def postprocess_html(self,soup, True):
+            if self.useHighResImages:
+                try:
+                    #open up all the "Enlarge this Image" pop-ups and download the full resolution jpegs
+                    enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
+                    if enlargeThisList:
+                        for popupref in enlargeThisList:
+                            popupreflink = popupref.find('a')
+                            if popupreflink:
+                                reflinkstring = str(popupreflink['href'])
+                                refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
+                                refend = reflinkstring.find(".html", refstart) + len(".html")
+                                reflinkstring = reflinkstring[refstart:refend]
+
+                                popuppage = self.browser.open(reflinkstring)
+                                popuphtml = popuppage.read()
+                                popuppage.close()
+                                if popuphtml:
+                                    st = time.localtime()
+                                    year = str(st.tm_year)
+                                    month = "%.2d" % st.tm_mon
+                                    day = "%.2d" % st.tm_mday
+                                    imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')
+                                    highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
+                                    popupSoup = BeautifulSoup(popuphtml)
+                                    highResTag = popupSoup.find('img', {'src':highResImageLink})
+                                    if highResTag:
+                                        try:
+                                            newWidth = highResTag['width']
+                                            newHeight = highResTag['height']
+                                            imageTag = popupref.parent.find("img")
+                                        except:
+                                            self.log("Error: finding width and height of img")
+                                        popupref.extract()
+                                        if imageTag:
+                                            try:
+                                                imageTag['src'] = highResImageLink
+                                                imageTag['width'] = newWidth
+                                                imageTag['height'] = newHeight
+                                            except:
+                                                self.log("Error setting the src width and height parameters")
+                except Exception:
+                    self.log("Error pulling high resolution images")
+
+                try:
+                    #in case pulling images failed, delete the enlarge this text
+                    enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
+                    if enlargeThisList:
+                        for popupref in enlargeThisList:
+                            popupref.extract()
+                except:
+                    self.log("Error removing Enlarge this text")
+
+
+        return self.strip_anchors(soup,False)
+
+    def postprocess_html(self,soup,first_fetch):
+        if not first_fetch: # remove Related links
+            for aside in soup.findAll('div','aside'):
+                aside.extract()
+            soup = self.strip_anchors(soup,True)
+
+        if soup.find('div',attrs={'id':'blogcontent'}) is None:
+            if first_fetch:
+                aside = soup.find('div','aside')
+                if aside is not None: # move the related list to the end of the article
+                    art = soup.find('div',attrs={'id':'article'})
+                    if art is None:
+                        art = soup.find('div',attrs={'class':'article'})
+                    if art is not None:
+                        art.append(aside)
            try:
                    if self.one_picture_per_article:
                            # Remove all images after first
@ -642,6 +985,7 @@ class NYTimes(BasicNewsRecipe):
            try:
                    # Change <nyt_headline> to <h2>
                    h1 = soup.find('h1')
+                    blogheadline = str(h1) #added for dealbook
                    if h1:
                            headline = h1.find("nyt_headline")
                            if headline:
@ -649,13 +993,19 @@ class NYTimes(BasicNewsRecipe):
                                    tag['class'] = "headline"
                                    tag.insert(0, self.fixChars(headline.contents[0]))
                                    h1.replaceWith(tag)
+                            elif blogheadline.find('entry-title'):#added for dealbook
+                                    tag = Tag(soup, "h2")#added for dealbook
+                                    tag['class'] = "headline"#added for dealbook
+                                    tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
+                                    h1.replaceWith(tag)#added for dealbook
+
                    else:
-				# Blog entry - replace headline, remove <hr> tags
+                            # Blog entry - replace headline, remove <hr> tags  - BCC I think this is no longer functional 1-18-2011
                            headline = soup.find('title')
                            if headline:
                                    tag = Tag(soup, "h2")
                                    tag['class'] = "headline"
-					tag.insert(0, self.fixChars(headline.contents[0]))
+                                    tag.insert(0, self.fixChars(headline.renderContents()))
                                    soup.insert(0, tag)
                                    hrs = soup.findAll('hr')
                                    for hr in hrs:
@ -663,6 +1013,29 @@ class NYTimes(BasicNewsRecipe):
            except:
                    self.log("ERROR:  Problem in Change <nyt_headline> to <h2>")

+            try:
+                    #if this is from a blog (dealbook, fix the byline format
+                    bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
+                    if bylineauthor:
+                        tag = Tag(soup, "h6")
+                        tag['class'] = "byline"
+                        tag.insert(0, self.fixChars(bylineauthor.renderContents()))
+                        bylineauthor.replaceWith(tag)
+            except:
+                self.log("ERROR:  fixing byline author format")
+
+            try:
+                    #if this is a blog (dealbook) fix the credit style for the pictures
+                    blogcredit = soup.find('div',attrs={'class':'credit'})
+                    if blogcredit:
+                        tag = Tag(soup, "h6")
+                        tag['class'] = "credit"
+                        tag.insert(0, self.fixChars(blogcredit.renderContents()))
+                        blogcredit.replaceWith(tag)
+            except:
+                self.log("ERROR:  fixing credit format")
+
+
            try:
                    # Change <h1> to <h3> - used in editorial blogs
                    masthead = soup.find("h1")
@ -685,6 +1058,13 @@ class NYTimes(BasicNewsRecipe):
                                    subhead.replaceWith(bTag)
            except:
                    self.log("ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")
+            try:
+                    #remove the <strong> update tag
+                    blogupdated = soup.find('span', {'class':'update'})
+                    if blogupdated:
+                        blogupdated.replaceWith("")
+            except:
+                    self.log("ERROR:  Removing strong tag")

            try:
                    divTag = soup.find('div',attrs={'id':'articleBody'})
@ -708,16 +1088,16 @@ class NYTimes(BasicNewsRecipe):
        return soup

    def populate_article_metadata(self, article, soup, first):
-        if first and hasattr(self, 'add_toc_thumbnail'):
+        if not first:
+            return
        idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
        if idxdiv is not None:
            if idxdiv.img:
-                    self.add_toc_thumbnail(article, idxdiv.img['src'])
+                self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',idxdiv.img['src']))
        else:
-                img = soup.find('img')
+            img = soup.find('body').find('img')
            if img is not None:
-                    self.add_toc_thumbnail(article, img['src'])
-
+                self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',img['src']))
        shortparagraph = ""
        try:
            if len(article.text_summary.strip()) == 0:
@ -731,13 +1111,22 @@ class NYTimes(BasicNewsRecipe):
                                #account for blank paragraphs and short paragraphs by appending them to longer ones
                                if len(refparagraph) > 0:
                                    if len(refparagraph) > 70: #approximately one line of text
-                                        article.summary = article.text_summary = shortparagraph + refparagraph
+                                        newpara = shortparagraph + refparagraph
+                                        newparaDateline,newparaEm,newparaDesc = newpara.partition('&mdash;')
+                                        if newparaEm == '':
+                                            newparaDateline,newparaEm,newparaDesc = newpara.partition('—')
+                                            if newparaEm == '':
+                                                newparaDesc = newparaDateline
+                                        article.summary = article.text_summary = newparaDesc.strip()
                                        return
                                    else:
                                        shortparagraph = refparagraph + " "
                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
                                            shortparagraph = shortparagraph + "- "
+            else:
+                article.summary = article.text_summary = self.massageNCXText(article.text_summary)
        except:
            self.log("Error creating article descriptions")
            return

+
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -6,31 +6,42 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 nytimes.com
 '''
 import re, string, time
-from calibre import entity_to_unicode, strftime
+from calibre import strftime
 from datetime import timedelta, date
+from time import sleep
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

-
 class NYTimes(BasicNewsRecipe):

+    recursions=1 # set this to zero to omit Related articles lists
+
+    # set getTechBlogs to True to include the technology blogs
+    # set tech_oldest_article to control article age
+    # set tech_max_articles_per_feed to control article count
+    getTechBlogs = True
+    remove_empty_feeds = True
+    tech_oldest_article = 14
+    tech_max_articles_per_feed = 25
+
+
    # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
    headlinesOnly = False

-    # set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
-    # number of days old an article can be for inclusion. If oldest_article = 0 all articles
-    # will be included. Note: oldest_article is ignored if webEdition = False
+    # set webEdition to True for the Web edition of the newspaper. Set oldest_web_article to the
+    # number of days old an article can be for inclusion. If oldest_web_article = None all articles
+    # will be included. Note: oldest_web_article is ignored if webEdition = False
    webEdition = False
-    oldest_article = 7
-
-    # replace paid Kindle Version:  the name will be changed to "The New York Times" to cause
-    # previous paid versions of the new york times to best sent to the back issues folder on the kindle
-    replaceKindleVersion = False
+    oldest_web_article = 7

    # download higher resolution images than the small thumbnails typically included in the article
    # the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
    useHighResImages = True

+    # replace paid Kindle Version:  the name will be changed to "The New York Times" to cause
+    # previous paid versions of the new york times to best sent to the back issues folder on the kindle
+    replaceKindleVersion = False
+
    # includeSections: List of sections to include. If empty, all sections found will be included.
    # Otherwise, only the sections named will be included. For example,
    #
@ -90,60 +101,68 @@ class NYTimes(BasicNewsRecipe):
                    ('Education',u'education'),
                    ('Multimedia',u'multimedia'),
                    (u'Obituaries',u'obituaries'),
-                    (u'Sunday Magazine',u'magazine'),
-                    (u'Week in Review',u'weekinreview')]
+                    (u'Sunday Magazine',u'magazine')
+                    ]
+
+    tech_feeds = [
+               (u'Tech - Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
+               (u'Tech - Bits', u'http://bits.blogs.nytimes.com/feed/'),
+               (u'Tech - Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
+               (u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
+                   ]
+

    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
-        needs_subscription = True
+        needs_subscription = False
    elif webEdition:
        title='New York Times (Web)'
        description = 'New York Times on the Web'
-        needs_subscription = True
+        needs_subscription = False
    elif replaceKindleVersion:
        title='The New York Times'
        description = 'Today\'s New York Times'
-        needs_subscription = True
+        needs_subscription = False
    else:
        title='New York Times'
-        description = 'Today\'s New York Times. Needs subscription from http://www.nytimes.com'
-        needs_subscription = True
+        description = 'Today\'s New York Times'
+        needs_subscription = False

-
-    month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
-
-    def decode_us_date(self,datestr):
-        udate = datestr.strip().lower().split()
+    def decode_url_date(self,url):
+        urlitems = url.split('/')
        try:
-            m = self.month_list.index(udate[0])+1
+            d = date(int(urlitems[3]),int(urlitems[4]),int(urlitems[5]))
        except:
-            return date.today()
-        d = int(udate[1])
-        y = int(udate[2])
            try:
-            d = date(y,m,d)
+                d = date(int(urlitems[4]),int(urlitems[5]),int(urlitems[6]))
            except:
-            d = date.today
+                return None
        return d

-    earliest_date = date.today() - timedelta(days=oldest_article)
+    if oldest_web_article is None:
+        earliest_date = date.today()
+    else:
+        earliest_date = date.today() - timedelta(days=oldest_web_article)
+    oldest_article = 365 # by default, a long time ago

-    __author__  = 'GRiker/Kovid Goyal/Nick Redding/Ben Collier'
+    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
    requires_version = (0, 7, 5)
-
+    encoding = 'utf-8'

    timefmt = ''
-    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+
+    simultaneous_downloads = 1
+
    cover_margins = (18,18,'grey99')

    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
-    remove_tags = [dict(attrs={'class':[
+    remove_tags = [
+                    dict(attrs={'class':[
                                        'articleFooter',
                                        'articleTools',
-                            'columnGroup doubleRule',
                                        'columnGroup singleRule',
                                        'columnGroup last',
                                        'columnGroup  last',
@ -151,7 +170,6 @@ class NYTimes(BasicNewsRecipe):
                                        'dottedLine',
                                        'entry-meta',
                                        'entry-response module',
-                            #'icon enlargeThis', #removed to provide option for high res images
                                        'leftNavTabs',
                                        'metaFootnote',
                                        'module box nav',
@ -175,12 +193,9 @@ class NYTimes(BasicNewsRecipe):
                                        'column four',#added for other blog downloads
                                        'column four last',#added for other blog downloads
                                        'column last', #added for other blog downloads
-                            'timestamp published', #added for other blog downloads
                                        'entry entry-related',
                                        'subNavigation tabContent active', #caucus blog navigation
-                            'columnGroup doubleRule',
                                        'mediaOverlay slideshow',
-                            'headlinesOnly multiline flush',
                                        'wideThumb',
                                        'video', #added 02-11-2011
                                        'videoHeader',#added 02-11-2011
@ -189,7 +204,18 @@ class NYTimes(BasicNewsRecipe):
                                        re.compile('^subNavigation'),
                                        re.compile('^leaderboard'),
                                        re.compile('^module'),
+                                        re.compile('commentCount')
                                        ]}),
+                    dict(name='div', attrs={'class':re.compile('toolsList')}),  # bits
+                    dict(name='div', attrs={'class':re.compile('postNavigation')}),  # bits
+                    dict(name='div', attrs={'class':'tweet'}),
+                    dict(name='span', attrs={'class':'commentCount meta'}),
+                    dict(name='div', attrs={'id':'header'}),
+                    dict(name='div', attrs={'id':re.compile('commentsContainer')}),  # bits, pogue, gadgetwise, open
+                    dict(name='ul', attrs={'class':re.compile('entry-tools')}),  # pogue, gadgetwise
+                    dict(name='div', attrs={'class':re.compile('nocontent')}),  # pogue, gadgetwise
+                    dict(name='div', attrs={'id':re.compile('respond')}), # open
+                    dict(name='div', attrs={'class':re.compile('entry-tags')}), # pogue
                    dict(id=[
                            'adxLeaderboard',
                            'adxSponLink',
@ -227,17 +253,21 @@ class NYTimes(BasicNewsRecipe):
    no_stylesheets = True
    extra_css = '''
                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .timestamp { text-align: left; font-size: small; }
-                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .timestamp { font-weight: normal; text-align: left; font-size: 50%; }
+                .caption { font-size: 50%; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
                a:link {text-decoration: none; }
+                .date{font-size: 50%; }
+                .update{font-size: 50%; }
                .articleBody { }
-                .authorId {text-align: left; }
+                .authorId {text-align: left; font-size: 50%; }
                .image {text-align: center;}
-                .source {text-align: left; }'''
+                .aside {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;}
+                .asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
+                .source {text-align: left; font-size: x-small; }'''


    articles = {}
@ -276,7 +306,7 @@ class NYTimes(BasicNewsRecipe):
    def exclude_url(self,url):
        if not url.startswith("http"):
            return True
-        if not url.endswith(".html") and 'dealbook.nytimes.com' not in url and 'blogs.nytimes.com' not in url: #added for DealBook
+        if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
            return True
        if 'nytimes.com' not in url:
            return True
@ -319,88 +349,91 @@ class NYTimes(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
-        if self.username is not None and self.password is not None:
-            br.open('http://www.nytimes.com/auth/login')
-            br.form = br.forms().next()
-            br['userid']   = self.username
-            br['password'] = self.password
-            raw = br.submit().read()
-            if 'Please try again' in raw:
-                raise Exception('Your username and password are incorrect')
        return br

-    def skip_ad_pages(self, soup):
-        # Skip ad pages served before actual article
-        skip_tag = soup.find(True, {'name':'skip'})
-        if skip_tag is not None:
-            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
-            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
-            url += '?pagewanted=all'
-            self.log.warn("Skipping ad to article at '%s'" % url)
-            return self.index_to_soup(url, raw=True)
+##    This doesn't work (and probably never did). It either gets another serve of the advertisement,
+##    or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding.
+##
+##    def skip_ad_pages(self, soup):
+##        # Skip ad pages served before actual article
+##        skip_tag = soup.find(True, {'name':'skip'})
+##        if skip_tag is not None:
+##            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
+##            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+##            url += '?pagewanted=all'
+##            self.log.warn("Skipping ad to article at '%s'" % url)
+##            return self.index_to_soup(url, raw=True)

+
+    cover_tag = 'NY_NYT'
    def get_cover_url(self):
-        cover = None
-        st = time.localtime()
-        year = str(st.tm_year)
-        month = "%.2d" % st.tm_mon
-        day = "%.2d" % st.tm_mday
-        cover = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/nytfrontpage/scan.jpg'
+        from datetime import timedelta, date
+        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
+        br = BasicNewsRecipe.get_browser()
+        daysback=1
+        try:
+            br.open(cover)
+        except:
+            while daysback<7:
+                cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.cover_tag+'.jpg'
                br = BasicNewsRecipe.get_browser()
                try:
                    br.open(cover)
                except:
+                    daysback = daysback+1
+                    continue
+                break
+        if daysback==7:
            self.log("\nCover unavailable")
            cover = None
        return cover

+    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+
    def short_title(self):
        return self.title

-    def index_to_soup(self, url_or_raw, raw=False):
-        '''
-        OVERRIDE of class method
-        deals with various page encodings between index and articles
-        '''
-        def get_the_soup(docEncoding, url_or_raw, raw=False) :
+
+    def article_to_soup(self, url_or_raw, raw=False):
+        from contextlib import closing
+        import copy
+        from calibre.ebooks.chardet import xml_to_unicode
        if re.match(r'\w+://', url_or_raw):
            br = self.clone_browser(self.browser)
-                f = br.open_novisit(url_or_raw)
+            open_func = getattr(br, 'open_novisit', br.open)
+            with closing(open_func(url_or_raw)) as f:
                _raw = f.read()
-                f.close()
            if not _raw:
                raise RuntimeError('Could not fetch index from %s'%url_or_raw)
        else:
            _raw = url_or_raw
        if raw:
            return _raw
-
        if not isinstance(_raw, unicode) and self.encoding:
-                _raw = _raw.decode(docEncoding, 'replace')
-            massage = list(BeautifulSoup.MARKUP_MASSAGE)
-            massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
-            return BeautifulSoup(_raw, markupMassage=massage)
+            if callable(self.encoding):
+                _raw = self.encoding(_raw)
+            else:
+                _raw = _raw.decode(self.encoding, 'replace')

-        # Entry point
-        soup = get_the_soup( self.encoding, url_or_raw )
-        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
-        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
-        if docEncoding == '' :
-            docEncoding = self.encoding
+        nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
+        nmassage.extend(self.preprocess_regexps)
+        nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')]
+        # Some websites have buggy doctype declarations that mess up beautifulsoup
+        # Remove comments as they can leave detritus when extracting tags leaves
+        # multiple nested comments
+        nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
+        usrc = xml_to_unicode(_raw, self.verbose, strip_encoding_pats=True)[0]
+        usrc = self.preprocess_raw_html(usrc, url_or_raw)
+        return BeautifulSoup(usrc, markupMassage=nmassage)

-        if self.verbose > 2:
-            self.log( "  document encoding: '%s'" % docEncoding)
-        if docEncoding != self.encoding :
-            soup = get_the_soup(docEncoding, url_or_raw)
-
-        return soup

    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
-            massaged = re.sub("&","&", massaged)
+            massaged = re.sub("&#038;","&", massaged)
+            massaged = re.sub("&amp;","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
@ -422,6 +455,16 @@ class NYTimes(BasicNewsRecipe):
        if self.filterDuplicates:
            if url in self.url_list:
                return
+        if self.webEdition:
+            date_tag = self.decode_url_date(url)
+            if date_tag is not None:
+                if  self.oldest_web_article is not None:
+                    if date_tag < self.earliest_date:
+                        self.log("Skipping article %s" % url)
+                        return
+            else:
+                self.log("Skipping article %s" % url)
+                return
        self.url_list.append(url)
        title = self.tag_to_string(a, use_alt=True).strip()
        description = ''
@ -446,6 +489,31 @@ class NYTimes(BasicNewsRecipe):
                            description=description, author=author,
                            content=''))

+    def get_tech_feeds(self,ans):
+        if self.getTechBlogs:
+            tech_articles = {}
+            key_list = []
+            save_oldest_article = self.oldest_article
+            save_max_articles_per_feed = self.max_articles_per_feed
+            self.oldest_article = self.tech_oldest_article
+            self.max_articles_per_feed = self.tech_max_articles_per_feed
+            self.feeds = self.tech_feeds
+            tech = self.parse_feeds()
+            self.oldest_article = save_oldest_article
+            self.max_articles_per_feed = save_max_articles_per_feed
+            self.feeds = None
+            for f in tech:
+                key_list.append(f.title)
+                tech_articles[f.title] = []
+                for a in f.articles:
+                    tech_articles[f.title].append(
+                        dict(title=a.title, url=a.url, date=a.date,
+                            description=a.summary, author=a.author,
+                            content=a.content))
+            tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
+            for x in tech_ans:
+                ans.append(x)
+        return ans

    def parse_web_edition(self):

@ -457,31 +525,41 @@ class NYTimes(BasicNewsRecipe):
            if sec_title in self.excludeSections:
                print "SECTION EXCLUDED: ",sec_title
                continue
-            print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
+            try:
                soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
+            except:
+                continue
+            print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
+
            self.key = sec_title
            # Find each article
            for div in soup.findAll(True,
-                attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
-                if div['class'] in ['story', 'story headline'] :
+                attrs={'class':['section-headline', 'ledeStory', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
+                if div['class'] in ['story', 'story headline', 'storyHeader'] :
                    self.handle_article(div)
+                elif div['class'] == 'ledeStory':
+                    divsub = div.find('div','storyHeader')
+                    if divsub is not None:
+                        self.handle_article(divsub)
+                        ulrefer = div.find('ul','refer')
+                        if ulrefer is not None:
+                            for lidiv in ulrefer.findAll('li'):
+                                self.handle_article(lidiv)
                elif div['class'] == 'headlinesOnly multiline flush':
                    for lidiv in div.findAll('li'):
                        self.handle_article(lidiv)

        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(self.ans)
+        return self.filter_ans(self.get_tech_feeds(self.ans))


    def parse_todays_index(self):

        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-
        skipping = False
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
-
            if div['class'] in ['section-headline','sectionHeader']:
                self.key = string.capwords(self.feed_title(div))
                self.key = self.key.replace('Op-ed','Op-Ed')
@ -505,7 +583,7 @@ class NYTimes(BasicNewsRecipe):
                        self.handle_article(lidiv)

        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(self.ans)
+        return self.filter_ans(self.get_tech_feeds(self.ans))

    def parse_headline_index(self):

@ -553,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
                    for h3_item in search_div.findAll('h3'):
                        byline = h3_item.h6
                        if byline is not None:
-                            author = self.tag_to_string(byline,usa_alt=False)
+                            author = self.tag_to_string(byline,use_alt=False)
                        else:
                            author = ''
                        a = h3_item.find('a', href=True)
@ -579,7 +657,7 @@ class NYTimes(BasicNewsRecipe):
                        self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))

        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(self.ans)
+        return self.filter_ans(self.get_tech_feeds(self.ans))

    def parse_index(self):
        if self.headlinesOnly:
@ -589,40 +667,198 @@ class NYTimes(BasicNewsRecipe):
        else:
            return self.parse_todays_index()

-    def strip_anchors(self,soup):
+    def strip_anchors(self,soup,kill_all=False):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
-                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+                    if kill_all or (self.recursions==0):
+                        a.replaceWith(self.tag_to_string(a,False))
+                    else:
+                        if a.has_key('href'):
+                            if a['href'].startswith('http://www.nytimes'):
+                                if not a['href'].endswith('pagewanted=all'):
+                                    url = re.sub(r'\?.*', '', a['href'])
+                                    if self.exclude_url(url):
+                                        a.replaceWith(self.tag_to_string(a,False))
+                                    else:
+                                        a['href'] = url+'?pagewanted=all'
+                            elif not (a['href'].startswith('http://pogue') or \
+                                      a['href'].startswith('http://bits') or \
+                                      a['href'].startswith('http://travel') or \
+                                      a['href'].startswith('http://business') or \
+                                      a['href'].startswith('http://tech') or \
+                                      a['href'].startswith('http://health') or \
+                                      a['href'].startswith('http://dealbook') or \
+                                      a['href'].startswith('http://open')):
+                                a.replaceWith(self.tag_to_string(a,False))
+        return soup
+
+    def handle_tags(self,soup):
+        try:
+            print("HANDLE TAGS: TITLE = "+self.tag_to_string(soup.title))
+        except:
+            print("HANDLE TAGS: NO TITLE")
+        if soup is None:
+            print("ERROR: handle_tags received NoneType")
+            return None
+
+##        print("HANDLING AD FORWARD:")
+##        print(soup)
+        if self.keep_only_tags:
+            body = Tag(soup, 'body')
+            try:
+                if isinstance(self.keep_only_tags, dict):
+                    self.keep_only_tags = [self.keep_only_tags]
+                for spec in self.keep_only_tags:
+                    for tag in soup.find('body').findAll(**spec):
+                        body.insert(len(body.contents), tag)
+                soup.find('body').replaceWith(body)
+            except AttributeError: # soup has no body element
+                pass
+
+        def remove_beyond(tag, next):
+            while tag is not None and getattr(tag, 'name', None) != 'body':
+                after = getattr(tag, next)
+                while after is not None:
+                    ns = getattr(tag, next)
+                    after.extract()
+                    after = ns
+                tag = tag.parent
+
+        if self.remove_tags_after is not None:
+            rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
+            for spec in rt:
+                tag = soup.find(**spec)
+                remove_beyond(tag, 'nextSibling')
+
+        if self.remove_tags_before is not None:
+            tag = soup.find(**self.remove_tags_before)
+            remove_beyond(tag, 'previousSibling')
+
+        for kwds in self.remove_tags:
+            for tag in soup.findAll(**kwds):
+                tag.extract()
+
        return soup


    def preprocess_html(self, soup):
-        if self.webEdition & (self.oldest_article>0):
-            date_tag = soup.find(True,attrs={'class': ['dateline','date']})
-            if date_tag:
-                date_str = self.tag_to_string(date_tag,use_alt=False)
-                date_str = date_str.replace('Published:','')
-                date_items = date_str.split(',')
-                try:
-                    datestring = date_items[0]+' '+date_items[1]
-                    article_date = self.decode_us_date(datestring)
-                except:
-                    article_date = date.today()
-                if article_date < self.earliest_date:
-                    self.log("Skipping article dated %s" % date_str)
-                    return None
+        print("PREPROCESS TITLE="+self.tag_to_string(soup.title))
+        skip_tag = soup.find(True, {'name':'skip'})
+        if skip_tag is not None:
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+            url += '?pagewanted=all'
+            self.log.warn("Skipping ad to article at '%s'" % url)
+            sleep(5)
+            soup = self.handle_tags(self.article_to_soup(url))

-        #all articles are from today, no need to print the date on every page
-        try:
-            if not self.webEdition:
-                date_tag = soup.find(True,attrs={'class': ['dateline','date']})
-                if date_tag:
-                    date_tag.extract()
-        except:
-            self.log("Error removing the published date")
+        # check if the article is from one of the tech blogs
+        blog=soup.find('div',attrs={'id':['pogue','bits','gadgetwise','open']})
+
+        if blog is not None:
+            old_body = soup.find('body')
+            new_body=Tag(soup,'body')
+            new_body.append(soup.find('div',attrs={'id':'content'}))
+            new_body.find('div',attrs={'id':'content'})['id']='blogcontent' # identify for postprocess_html
+            old_body.replaceWith(new_body)
+            for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
+                if divr.find(text=re.compile('Sign up')):
+                    divr.extract()
+            divr = soup.find('div',attrs={'id':re.compile('related-content')})
+            if divr is not None:
+            # handle related articles
+                rlist = []
+                ul = divr.find('ul')
+                if ul is not None:
+                    for li in ul.findAll('li'):
+                        atag = li.find('a')
+                        if atag is not None:
+                            if atag['href'].startswith('http://pogue') or atag['href'].startswith('http://bits') or \
+                                atag['href'].startswith('http://open'):
+                                atag.find(text=True).replaceWith(self.massageNCXText(self.tag_to_string(atag,False)))
+                                rlist.append(atag)
+                divr.extract()
+                if rlist != []:
+                    asidediv = Tag(soup,'div',[('class','aside')])
+                    if soup.find('hr') is None:
+                        asidediv.append(Tag(soup,'hr'))
+                    h4 = Tag(soup,'h4',[('class','asidenote')])
+                    h4.insert(0,"Related Posts")
+                    asidediv.append(h4)
+                    ul = Tag(soup,'ul')
+                    for r in rlist:
+                        li = Tag(soup,'li',[('class','aside')])
+                        r['class'] = 'aside'
+                        li.append(r)
+                        ul.append(li)
+                    asidediv.append(ul)
+                    asidediv.append(Tag(soup,'hr'))
+                    smain = soup.find('body')
+                    smain.append(asidediv)
+            for atag in soup.findAll('a'):
+                img = atag.find('img')
+                if img is not None:
+                    atag.replaceWith(img)
+                elif not atag.has_key('href'):
+                    atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
+                elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
+                              atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
+                    atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
+            hdr = soup.find('address')
+            if hdr is not None:
+                hdr.name='span'
+            for span_credit in soup.findAll('span','credit'):
+                sp = Tag(soup,'span')
+                span_credit.replaceWith(sp)
+                sp.append(Tag(soup,'br'))
+                sp.append(span_credit)
+                sp.append(Tag(soup,'br'))
+
+        else: # nytimes article
+
+            related = [] # these will be the related articles
+            first_outer = None # first related outer tag
+            first_related = None # first related tag
+            for outerdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
+                for rdiv in soup.findAll('div','columnGroup doubleRule'):
+                    if rdiv.find('h3') is not None:
+                        if self.tag_to_string(rdiv.h3,False).startswith('Related'):
+                            rdiv.h3.find(text=True).replaceWith("Related articles")
+                            rdiv.h3['class'] = 'asidenote'
+                            for litag in rdiv.findAll('li'):
+                                if litag.find('a') is not None:
+                                    if litag.find('a')['href'].startswith('http://www.nytimes.com'):
+                                        url = re.sub(r'\?.*', '', litag.find('a')['href'])
+                                        litag.find('a')['href'] = url+'?pagewanted=all'
+                                        litag.extract()
+                                        related.append(litag)
+                                        if first_related is None:
+                                            first_related = rdiv
+                                            first_outer = outerdiv
+                                    else:
+                                        litag.extract()
+            if related != []:
+                for r in related:
+                    if r.h6: # don't want the anchor inside a h6 tag
+                        r.h6.replaceWith(r.h6.a)
+                    first_related.ul.append(r)
+                first_related.insert(0,Tag(soup,'hr'))
+                first_related.append(Tag(soup,'hr'))
+                first_related['class'] = 'aside'
+                first_outer.replaceWith(first_related) # replace the outer tag with the related tag
+
+            for rdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
+                rdiv.extract()
+
+            kicker_tag = soup.find(attrs={'class':'kicker'})
+            if kicker_tag: # remove Op_Ed author head shots
+                tagline = self.tag_to_string(kicker_tag)
+                if tagline=='Op-Ed Columnist':
+                    img_div = soup.find('div','inlineImage module')
+                    if img_div:
+                        img_div.extract()

            if self.useHighResImages:
                try:
@ -667,26 +903,6 @@ class NYTimes(BasicNewsRecipe):
                except Exception:
                    self.log("Error pulling high resolution images")

-            try:
-                #remove "Related content" bar
-                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft  ','articleInline runaroundLeft  lastArticleInline']})
-                if runAroundsFound:
-                    for runAround in runAroundsFound:
-                        #find all section headers
-                        hlines = runAround.findAll(True ,{'class':['sectionHeader','sectionHeader flushBottom']})
-                        if hlines:
-                            for hline in hlines:
-                                hline.extract()
-
-                        #find all section headers
-                        hlines = runAround.findAll('h6')
-                        if hlines:
-                            for hline in hlines:
-                                hline.extract()
-            except:
-                self.log("Error removing related content bar")
-
-
                try:
                    #in case pulling images failed, delete the enlarge this text
                    enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
@ -696,9 +912,24 @@ class NYTimes(BasicNewsRecipe):
                except:
                    self.log("Error removing Enlarge this text")

-        return self.strip_anchors(soup)

-    def postprocess_html(self,soup, True):
+        return self.strip_anchors(soup,False)
+
+    def postprocess_html(self,soup,first_fetch):
+        if not first_fetch: # remove Related links
+            for aside in soup.findAll('div','aside'):
+                aside.extract()
+            soup = self.strip_anchors(soup,True)
+
+        if soup.find('div',attrs={'id':'blogcontent'}) is None:
+            if first_fetch:
+                aside = soup.find('div','aside')
+                if aside is not None: # move the related list to the end of the article
+                    art = soup.find('div',attrs={'id':'article'})
+                    if art is None:
+                        art = soup.find('div',attrs={'class':'article'})
+                    if art is not None:
+                        art.append(aside)
            try:
                    if self.one_picture_per_article:
                            # Remove all images after first
@ -855,23 +1086,22 @@ class NYTimes(BasicNewsRecipe):
                    self.log("ERROR:  Problem in Add class=authorId to <div> so we can format with CSS")

        return soup
+
    def populate_article_metadata(self, article, soup, first):
-        if first and hasattr(self, 'add_toc_thumbnail'):
+        if not first:
+            return
        idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
        if idxdiv is not None:
            if idxdiv.img:
-                    self.add_toc_thumbnail(article, idxdiv.img['src'])
+                self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',idxdiv.img['src']))
        else:
-                img = soup.find('img')
+            img = soup.find('body').find('img')
            if img is not None:
-                    self.add_toc_thumbnail(article, img['src'])
-
+                self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',img['src']))
        shortparagraph = ""
        try:
            if len(article.text_summary.strip()) == 0:
                articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
-                if not articlebodies: #added to account for blog formats
-                    articlebodies = soup.findAll('div', attrs={'class':'entry-content'}) #added to account for blog formats
                if articlebodies:
                    for articlebody in articlebodies:
                        if articlebody:
@ -880,15 +1110,23 @@ class NYTimes(BasicNewsRecipe):
                                refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
                                #account for blank paragraphs and short paragraphs by appending them to longer ones
                                if len(refparagraph) > 0:
-                                    if len(refparagraph) > 140: #approximately two lines of text
-                                        article.summary = article.text_summary = shortparagraph + refparagraph
+                                    if len(refparagraph) > 70: #approximately one line of text
+                                        newpara = shortparagraph + refparagraph
+                                        newparaDateline,newparaEm,newparaDesc = newpara.partition('&mdash;')
+                                        if newparaEm == '':
+                                            newparaDateline,newparaEm,newparaDesc = newpara.partition('—')
+                                            if newparaEm == '':
+                                                newparaDesc = newparaDateline
+                                        article.summary = article.text_summary = newparaDesc.strip()
                                        return
                                    else:
                                        shortparagraph = refparagraph + " "
                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
                                            shortparagraph = shortparagraph + "- "
-
+            else:
+                article.summary = article.text_summary = self.massageNCXText(article.text_summary)
        except:
            self.log("Error creating article descriptions")
            return

+
--- a/recipes/sueddeutsche.recipe
+++ b/recipes/sueddeutsche.recipe
@ -8,19 +8,19 @@ Fetch sueddeutsche.de
 from calibre.web.feeds.news import BasicNewsRecipe
 class Sueddeutsche(BasicNewsRecipe):

-    title                 = u'Süddeutsche.de'                 # 2012-01-26 AGe Correct Title
-    description           = 'News from Germany, Access to online content' # 2012-01-26 AGe
-    __author__            = 'Oliver Niesner and Armin Geller' #Update AGe 2012-01-26
-    publisher             = u'Süddeutsche Zeitung'             # 2012-01-26 AGe add
-    category              = 'news, politics, Germany'         # 2012-01-26 AGe add
-    timefmt               = ' [%a, %d %b %Y]'                 # 2012-01-26 AGe add %a
+    title                 = u'Süddeutsche.de'
+    description           = 'News from Germany, Access to online content'
+    __author__            = 'Oliver Niesner and Armin Geller' #Update AGe 2012-12-05
+    publisher             = u'Süddeutsche Zeitung'
+    category              = 'news, politics, Germany'
+    timefmt               = ' [%a, %d %b %Y]'
    oldest_article        = 7
    max_articles_per_feed = 100
    language              = 'de'
    encoding              = 'utf-8'
-    publication_type      = 'newspaper'                         # 2012-01-26 add
+    publication_type      = 'newspaper'
    cover_source          = 'http://www.sueddeutsche.de/verlag' # 2012-01-26 AGe add from Darko Miletic paid content source
-    masthead_url          = 'http://www.sueddeutsche.de/static_assets/build/img/sdesiteheader/logo_homepage.441d531c.png' # 2012-01-26 AGe add
+    masthead_url          = 'http://www.sueddeutsche.de/static_assets/img/sdesiteheader/logo_standard.a152b0df.png' # 2012-12-05 AGe add

    use_embedded_content  = False
    no_stylesheets        = True
@ -40,9 +40,9 @@ class Sueddeutsche(BasicNewsRecipe):
              (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
              (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
              (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
-              (u'Bildung', u'http://rss.sueddeutsche.de/rss/bildung'),         #2012-01-26 AGe New
-              (u'Gesundheit', u'http://rss.sueddeutsche.de/rss/gesundheit'),   #2012-01-26 AGe New
-              (u'Stil', u'http://rss.sueddeutsche.de/rss/stil'),               #2012-01-26 AGe New
+              (u'Bildung', u'http://rss.sueddeutsche.de/rss/bildung'),
+              (u'Gesundheit', u'http://rss.sueddeutsche.de/rss/gesundheit'),
+              (u'Stil', u'http://rss.sueddeutsche.de/rss/stil'),
              (u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
              (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
              (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
--- a/recipes/weblogs_sl.recipe
+++ b/recipes/weblogs_sl.recipe
@ -2,8 +2,8 @@
 __license__     = 'GPL v3'
 __copyright__   = '4 February 2011, desUBIKado'
 __author__      = 'desUBIKado'
-__version__     = 'v0.08'
-__date__        = '30, June 2012'
+__version__     = 'v0.09'
+__date__        = '02, December 2012'
 '''
 http://www.weblogssl.com/
 '''
@ -37,6 +37,7 @@ class weblogssl(BasicNewsRecipe):
                          ,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx')
                          ,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
                          ,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
+                          ,(u'Xataka Windows', u'http://feeds.weblogssl.com/xatakawindows')
                          ,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto')
                          ,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon')
                          ,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia')
@ -80,19 +81,31 @@ class weblogssl(BasicNewsRecipe):

    keep_only_tags     = [dict(name='div', attrs={'id':'infoblock'}),
                          dict(name='div', attrs={'class':'post'}),
-                          dict(name='div', attrs={'id':'blog-comments'})
+                          dict(name='div', attrs={'id':'blog-comments'}),
+                          dict(name='div', attrs={'class':'container'})   #m.xataka.com
                         ]

-    remove_tags        = [dict(name='div', attrs={'id':'comment-nav'})]
+    remove_tags        = [dict(name='div', attrs={'id':'comment-nav'}),
+                          dict(name='menu', attrs={'class':'social-sharing'}),    #m.xataka.com
+                          dict(name='section' , attrs={'class':'comments'}),      #m.xataka.com
+                          dict(name='div' , attrs={'class':'article-comments'}),  #m.xataka.com
+                          dict(name='nav' , attrs={'class':'article-taxonomy'})   #m.xataka.com
+			 ]
+
+    remove_tags_after  = dict(name='section' , attrs={'class':'comments'})

    def print_version(self, url):
          return url.replace('http://www.', 'http://m.')

    preprocess_regexps = [
                            # Para poner una linea en blanco entre un comentario y el siguiente
-                           (re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
+                           (re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c'),
+                            # Para ver las imágenes en las noticias de m.xataka.com
+                           (re.compile(r'<noscript>', re.DOTALL|re.IGNORECASE), lambda m: ''),
+                           (re.compile(r'</noscript>', re.DOTALL|re.IGNORECASE), lambda m: '')
                         ]

+
    # Para sustituir el video incrustado de YouTube por una imagen

    def preprocess_html(self, soup):
@ -108,14 +121,16 @@ class weblogssl(BasicNewsRecipe):

    # Para obtener la url original del articulo a partir de la de "feedsportal"
    # El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
-    # http://www.mobileread.com/forums/sho...d.php?t=130297
+    # http://www.mobileread.com/forums/showthread.php?t=130297

    def get_article_url(self, article):
       link = article.get('link', None)
       if link is None:
           return article
+       # if link.split('/')[-4]=="xataka2":
+       #     return article.get('feedburner_origlink', article.get('link', article.get('guid')))
       if link.split('/')[-4]=="xataka2":
-           return article.get('feedburner_origlink', article.get('link', article.get('guid')))
+           return article.get('guid', None)
       if link.split('/')[-1]=="story01.htm":
           link=link.split('/')[-2]
           a=['0B','0C','0D','0E','0F','0G','0N'  ,'0L0S','0A']
--- a/recipes/zaman.recipe
+++ b/recipes/zaman.recipe
@ -9,15 +9,15 @@ class Zaman (BasicNewsRecipe):
    __author__             = u'thomass'
    oldest_article         = 2
    max_articles_per_feed  =50
-   # no_stylesheets         = True
+    no_stylesheets         = True
    #delay                  = 1
-    #use_embedded_content   = False
-    encoding               = 'ISO 8859-9'
-    publisher              = 'Zaman'
+    use_embedded_content   = False
+    encoding               = 'utf-8'
+    publisher              = 'Feza Gazetecilik'
    category               = 'news, haberler,TR,gazete'
    language               = 'tr'
    publication_type = 'newspaper '
-    extra_css              = '.buyukbaslik{font-weight: bold; font-size: 18px;color:#0000FF}'#body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    extra_css              = 'h1{text-transform: capitalize; font-weight: bold; font-size: 22px;color:#0000FF} p{text-align:justify} ' #.introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
    conversion_options = {
                            'tags'            : category
                            ,'language'        : language
@ -26,25 +26,26 @@ class Zaman (BasicNewsRecipe):
                         }
    cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
    masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
+    ignore_duplicate_articles = { 'title', 'url' }
+    auto_cleanup = False
+    remove_empty_feeds= True


-    #keep_only_tags      = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']})  ]
-    remove_tags = [ dict(name='img', attrs={'src':['http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif']})]#,dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})
+    #keep_only_tags      = [dict(name='div', attrs={'id':[ 'contentposition19']})]#,dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}),  ]
+    remove_tags = [ dict(name='img', attrs={'src':['http://cmsmedya.zaman.com.tr/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':['interactive-hr']})]#    remove_tags = [ dict(name='div', attrs={'class':[ 'detayUyari']}),dict(name='div', attrs={'class':[ 'detayYorum']}),dict(name='div', attrs={'class':[ 'addthis_toolbox addthis_default_style ']}),dict(name='div', attrs={'id':[ 'tumYazi']})]#,dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']}),dict(name='div', attrs={'id':[ 'news-detail-gallery']}),dict(name='div', attrs={'id':[ 'news-detail-title-bottom-part']}),dict(name='div', attrs={'id':[ 'news-detail-news-paging-main']})]#


    #remove_attributes = ['width','height']
    remove_empty_feeds= True

    feeds          = [
-                      ( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
-                      ( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
-                      #( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
-                      #( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
-                      ( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
+                      ( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
                      ( u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
                      ( u'Politika', u'http://www.zaman.com.tr/politika.rss'),
                      ( u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
                      ( u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
+                      ( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
+                      ( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
                      ( u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
                      ( u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
                      ( u'Dizi Yazı', u'http://www.zaman.com.tr/dizi.rss'),
@ -59,8 +60,9 @@ class Zaman (BasicNewsRecipe):
                      ( u'Cuma Eki', u'http://www.zaman.com.tr/cuma.rss'),
                      ( u'Cumaertesi Eki', u'http://www.zaman.com.tr/cumaertesi.rss'),
                      ( u'Pazar Eki', u'http://www.zaman.com.tr/pazar.rss'),
+                      ( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
+                      ( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),

                        ]
    def print_version(self, url):
-     return url.replace('http://www.zaman.com.tr/haber.do?haberno=', 'http://www.zaman.com.tr/yazdir.do?haberno=')
-
+     return url.replace('http://www.zaman.com.tr/newsDetail_getNewsById.action?newsId=', 'http://www.zaman.com.tr/newsDetail_openPrintPage.action?newsId=')
--- a/setup/init.py
+++ b/setup/init.py
@ -215,6 +215,8 @@ class Command(object):
        sys.stdout.flush()

 def installer_name(ext, is64bit=False):
+    if is64bit and ext == 'msi':
+        return 'dist/%s-64bit-%s.msi'%(__appname__, __version__)
    if ext in ('exe', 'msi'):
        return 'dist/%s-%s.%s'%(__appname__, __version__, ext)
    if ext == 'dmg':
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -11,12 +11,11 @@ from distutils.spawn import find_executable

 from PyQt4 import pyqtconfig

-from setup import isosx, iswindows, islinux
+from setup import isosx, iswindows, islinux, is64bit

 OSX_SDK = '/Developer/SDKs/MacOSX10.5.sdk'

 os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.5'
-is64bit = sys.maxsize > 2**32

 NMAKE = RC = msvc = MT = win_inc = win_lib = win_ddk = win_ddk_lib_dirs = None
 if iswindows:
--- a/setup/commands.py
+++ b/setup/commands.py
@ -20,7 +20,7 @@ __all__ = [
        'upload_user_manual', 'upload_demo', 'reupload',
        'linux32', 'linux64', 'linux', 'linux_freeze',
        'osx32_freeze', 'osx', 'rsync', 'push',
-        'win32_freeze', 'win32', 'win',
+        'win32_freeze', 'win32', 'win64', 'win',
        'stage1', 'stage2', 'stage3', 'stage4', 'stage5', 'publish'
        ]

@ -91,9 +91,10 @@ osx = OSX()
 from setup.installer.osx.app.main import OSX32_Freeze
 osx32_freeze = OSX32_Freeze()

-from setup.installer.windows import Win, Win32
+from setup.installer.windows import Win, Win32, Win64
 win = Win()
 win32 = Win32()
+win64 = Win64()
 from setup.installer.windows.freeze import Win32Freeze
 win32_freeze = Win32Freeze()

--- a/setup/installer/windows/MemoryModule.c
+++ b/setup/installer/windows/MemoryModule.c
@ -1,12 +1,12 @@
 /*
 * Memory DLL loading code
- * Version 0.0.2 with additions from Thomas Heller
+ * Version 0.0.3
 *
- * Copyright (c) 2004-2005 by Joachim Bauch / mail@joachim-bauch.de
+ * Copyright (c) 2004-2012 by Joachim Bauch / mail@joachim-bauch.de
 * http://www.joachim-bauch.de
 *
 * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
+ * 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
@ -19,156 +19,48 @@
 *
 * The Initial Developer of the Original Code is Joachim Bauch.
 *
- * Portions created by Joachim Bauch are Copyright (C) 2004-2005
+ * Portions created by Joachim Bauch are Copyright (C) 2004-2012
 * Joachim Bauch. All Rights Reserved.
 *
- * Portions Copyright (C) 2005 Thomas Heller.
- *
 */

+#ifndef __GNUC__
 // disable warnings about pointer <-> DWORD conversions
 #pragma warning( disable : 4311 4312 )
+#endif
+
+#ifdef _WIN64
+#define POINTER_TYPE ULONGLONG
+#else
+#define POINTER_TYPE DWORD
+#endif

 #include <Windows.h>
 #include <winnt.h>
-#if DEBUG_OUTPUT
+#ifdef DEBUG_OUTPUT
 #include <stdio.h>
 #endif

 #ifndef IMAGE_SIZEOF_BASE_RELOCATION
 // Vista SDKs no longer define IMAGE_SIZEOF_BASE_RELOCATION!?
-# define IMAGE_SIZEOF_BASE_RELOCATION (sizeof(IMAGE_BASE_RELOCATION))
+#define IMAGE_SIZEOF_BASE_RELOCATION (sizeof(IMAGE_BASE_RELOCATION))
 #endif
+
 #include "MemoryModule.h"

-/*
-  XXX We need to protect at least walking the 'loaded' linked list with a lock!
-*/
-
-/******************************************************************/
-FINDPROC findproc;
-void *findproc_data = NULL;
-
-struct NAME_TABLE {
-	char *name;
-	DWORD ordinal;
-};
-
-typedef struct tagMEMORYMODULE {
+typedef struct {
 	PIMAGE_NT_HEADERS headers;
 	unsigned char *codeBase;
 	HMODULE *modules;
 	int numModules;
 	int initialized;
-
-	struct NAME_TABLE *name_table;
-
-	char *name;
-	int refcount;
-	struct tagMEMORYMODULE *next, *prev;
 } MEMORYMODULE, *PMEMORYMODULE;

 typedef BOOL (WINAPI *DllEntryProc)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved);

 #define GET_HEADER_DICTIONARY(module, idx)	&(module)->headers->OptionalHeader.DataDirectory[idx]

-MEMORYMODULE *loaded; /* linked list of loaded memory modules */
-
-/* private - insert a loaded library in a linked list */
-static void _Register(char *name, MEMORYMODULE *module)
-{
-	module->next = loaded;
-	if (loaded)
-		loaded->prev = module;
-	module->prev = NULL;
-	loaded = module;
-}
-
-/* private - remove a loaded library from a linked list */
-static void _Unregister(MEMORYMODULE *module)
-{
-	free(module->name);
-	if (module->prev)
-		module->prev->next = module->next;
-	if (module->next)
-		module->next->prev = module->prev;
-	if (module == loaded)
-		loaded = module->next;
-}
-
-/* public - replacement for GetModuleHandle() */
-HMODULE MyGetModuleHandle(LPCTSTR lpModuleName)
-{
-	MEMORYMODULE *p = loaded;
-	while (p) {
-		// If already loaded, only increment the reference count
-		if (0 == stricmp(lpModuleName, p->name)) {
-			return (HMODULE)p;
-		}
-		p = p->next;
-	}
-	return GetModuleHandle(lpModuleName);
-}
-
-/* public - replacement for LoadLibrary, but searches FIRST for memory
-   libraries, then for normal libraries.  So, it will load libraries AS memory
-   module if they are found by findproc().
-*/
-HMODULE MyLoadLibrary(char *lpFileName)
-{
-	MEMORYMODULE *p = loaded;
-	HMODULE hMod;
-
-	while (p) {
-		// If already loaded, only increment the reference count
-		if (0 == stricmp(lpFileName, p->name)) {
-			p->refcount++;
-			return (HMODULE)p;
-		}
-		p = p->next;
-	}
-	if (findproc && findproc_data) {
-		void *pdata = findproc(lpFileName, findproc_data);
-		if (pdata) {
-			hMod = MemoryLoadLibrary(lpFileName, pdata);
-			free(p);
-			return hMod;
-		}
-	}
-	hMod = LoadLibrary(lpFileName);
-	return hMod;
-}
-
-/* public - replacement for GetProcAddress() */
-FARPROC MyGetProcAddress(HMODULE hModule, LPCSTR lpProcName)
-{
-	MEMORYMODULE *p = loaded;
-	while (p) {
-		if ((HMODULE)p == hModule)
-			return MemoryGetProcAddress(p, lpProcName);
-		p = p->next;
-	}
-	return GetProcAddress(hModule, lpProcName);
-}
-
-/* public - replacement for FreeLibrary() */
-BOOL MyFreeLibrary(HMODULE hModule)
-{
-	MEMORYMODULE *p = loaded;
-	while (p) {
-		if ((HMODULE)p == hModule) {
-			if (--p->refcount == 0) {
-				_Unregister(p);
-				MemoryFreeLibrary(p);
-			}
-			return TRUE;
-		}
-		p = p->next;
-	}
-	return FreeLibrary(hModule);
-}
-
-#if DEBUG_OUTPUT
+#ifdef DEBUG_OUTPUT
 static void
 OutputLastError(const char *msg)
 {
@ -184,20 +76,6 @@ OutputLastError(const char *msg)
 }
 #endif

-/*
-static int dprintf(char *fmt, ...)
-{
-	char Buffer[4096];
-	va_list marker;
-	int result;
-	
-	va_start(marker, fmt);
-	result = vsprintf(Buffer, fmt, marker);
-	OutputDebugString(Buffer);
-	return result;
-}
-*/
-
 static void
 CopySections(const unsigned char *data, PIMAGE_NT_HEADERS old_headers, PMEMORYMODULE module)
 {
@ -205,15 +83,12 @@ CopySections(const unsigned char *data, PIMAGE_NT_HEADERS old_headers, PMEMORYMO
 	unsigned char *codeBase = module->codeBase;
 	unsigned char *dest;
 	PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers);
-	for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++)
-	{
-		if (section->SizeOfRawData == 0)
-		{
+	for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++) {
+		if (section->SizeOfRawData == 0) {
 			// section doesn't contain data in the dll itself, but may define
 			// uninitialized data
 			size = old_headers->OptionalHeader.SectionAlignment;
-			if (size > 0)
-			{
+			if (size > 0) {
 				dest = (unsigned char *)VirtualAlloc(codeBase + section->VirtualAddress,
 					size,
 					MEM_COMMIT,
@ -255,66 +130,72 @@ FinalizeSections(PMEMORYMODULE module)
 {
 	int i;
 	PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers);
+#ifdef _WIN64
+	POINTER_TYPE imageOffset = (module->headers->OptionalHeader.ImageBase & 0xffffffff00000000);
+#else
+	#define imageOffset 0
+#endif
 	
 	// loop through all sections and change access flags
-	for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++)
-	{
+	for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++) {
 		DWORD protect, oldProtect, size;
 		int executable = (section->Characteristics & IMAGE_SCN_MEM_EXECUTE) != 0;
 		int readable =   (section->Characteristics & IMAGE_SCN_MEM_READ) != 0;
 		int writeable =  (section->Characteristics & IMAGE_SCN_MEM_WRITE) != 0;

-		if (section->Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
-		{
+		if (section->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) {
 			// section is not needed any more and can safely be freed
-			VirtualFree((LPVOID)section->Misc.PhysicalAddress, section->SizeOfRawData, MEM_DECOMMIT);
+			VirtualFree((LPVOID)((POINTER_TYPE)section->Misc.PhysicalAddress | imageOffset), section->SizeOfRawData, MEM_DECOMMIT);
 			continue;
 		}

 		// determine protection flags based on characteristics
 		protect = ProtectionFlags[executable][readable][writeable];
-		if (section->Characteristics & IMAGE_SCN_MEM_NOT_CACHED)
+		if (section->Characteristics & IMAGE_SCN_MEM_NOT_CACHED) {
 			protect |= PAGE_NOCACHE;
+		}

 		// determine size of region
 		size = section->SizeOfRawData;
-		if (size == 0)
-		{
-			if (section->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
+		if (size == 0) {
+			if (section->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) {
 				size = module->headers->OptionalHeader.SizeOfInitializedData;
-			else if (section->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+			} else if (section->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) {
 				size = module->headers->OptionalHeader.SizeOfUninitializedData;
 			}
+		}

-		if (size > 0)
-		{
+		if (size > 0) {
 			// change memory access flags
-			if (VirtualProtect((LPVOID)section->Misc.PhysicalAddress, section->SizeOfRawData, protect, &oldProtect) == 0)
-#if DEBUG_OUTPUT
+			if (VirtualProtect((LPVOID)((POINTER_TYPE)section->Misc.PhysicalAddress | imageOffset), size, protect, &oldProtect) == 0)
+#ifdef DEBUG_OUTPUT
 				OutputLastError("Error protecting memory page")
 #endif
 			;
 		}
 	}
+#ifndef _WIN64
+#undef imageOffset
+#endif
 }

 static void
-PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)
+PerformBaseRelocation(PMEMORYMODULE module, SIZE_T delta)
 {
 	DWORD i;
 	unsigned char *codeBase = module->codeBase;

 	PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_BASERELOC);
-	if (directory->Size > 0)
-	{
-		PIMAGE_BASE_RELOCATION relocation = (PIMAGE_BASE_RELOCATION)(codeBase + directory->VirtualAddress);
-		for (; relocation->VirtualAddress > 0; )
-		{
-			unsigned char *dest = (unsigned char *)(codeBase + relocation->VirtualAddress);
+	if (directory->Size > 0) {
+		PIMAGE_BASE_RELOCATION relocation = (PIMAGE_BASE_RELOCATION) (codeBase + directory->VirtualAddress);
+		for (; relocation->VirtualAddress > 0; ) {
+			unsigned char *dest = codeBase + relocation->VirtualAddress;
 			unsigned short *relInfo = (unsigned short *)((unsigned char *)relocation + IMAGE_SIZEOF_BASE_RELOCATION);
-			for (i=0; i<((relocation->SizeOfBlock-IMAGE_SIZEOF_BASE_RELOCATION) / 2); i++, relInfo++)
-			{
+			for (i=0; i<((relocation->SizeOfBlock-IMAGE_SIZEOF_BASE_RELOCATION) / 2); i++, relInfo++) {
 				DWORD *patchAddrHL;
+#ifdef _WIN64
+				ULONGLONG *patchAddr64;
+#endif
 				int type, offset;

 				// the upper 4 bits define the type of relocation
@ -330,10 +211,17 @@ PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)

 				case IMAGE_REL_BASED_HIGHLOW:
 					// change complete 32 bit address
-					patchAddrHL = (DWORD *)(dest + offset);
-					*patchAddrHL += delta;
+					patchAddrHL = (DWORD *) (dest + offset);
+					*patchAddrHL += (DWORD)delta;
 					break;
 				
+#ifdef _WIN64
+				case IMAGE_REL_BASED_DIR64:
+					patchAddr64 = (ULONGLONG *) (dest + offset);
+					*patchAddr64 += delta;
+					break;
+#endif
+
 				default:
 					//printf("Unknown relocation: %d\n", type);
 					break;
@ -341,7 +229,7 @@ PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)
 			}

 			// advance to next relocation block
-			relocation = (PIMAGE_BASE_RELOCATION)(((DWORD)relocation) + relocation->SizeOfBlock);
+			relocation = (PIMAGE_BASE_RELOCATION) (((char *) relocation) + relocation->SizeOfBlock);
 		}
 	}
 }
@ -353,18 +241,13 @@ BuildImportTable(PMEMORYMODULE module)
 	unsigned char *codeBase = module->codeBase;

 	PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_IMPORT);
-	if (directory->Size > 0)
-	{
-		PIMAGE_IMPORT_DESCRIPTOR importDesc = (PIMAGE_IMPORT_DESCRIPTOR)(codeBase + directory->VirtualAddress);
-		for (; !IsBadReadPtr(importDesc, sizeof(IMAGE_IMPORT_DESCRIPTOR)) && importDesc->Name; importDesc++)
-		{
-			DWORD *thunkRef, *funcRef;
-			HMODULE handle;
-
-			handle = MyLoadLibrary(codeBase + importDesc->Name);
-			if (handle == INVALID_HANDLE_VALUE)
-			{
-				//LastError should already be set
+	if (directory->Size > 0) {
+		PIMAGE_IMPORT_DESCRIPTOR importDesc = (PIMAGE_IMPORT_DESCRIPTOR) (codeBase + directory->VirtualAddress);
+		for (; !IsBadReadPtr(importDesc, sizeof(IMAGE_IMPORT_DESCRIPTOR)) && importDesc->Name; importDesc++) {
+			POINTER_TYPE *thunkRef;
+			FARPROC *funcRef;
+			HMODULE handle = LoadLibrary((LPCSTR) (codeBase + importDesc->Name));
+			if (handle == NULL) {
 #if DEBUG_OUTPUT
 				OutputLastError("Can't load library");
 #endif
@ -373,81 +256,54 @@ BuildImportTable(PMEMORYMODULE module)
 			}

 			module->modules = (HMODULE *)realloc(module->modules, (module->numModules+1)*(sizeof(HMODULE)));
-			if (module->modules == NULL)
-			{
-				SetLastError(ERROR_NOT_ENOUGH_MEMORY);
+			if (module->modules == NULL) {
 				result = 0;
 				break;
 			}

 			module->modules[module->numModules++] = handle;
-			if (importDesc->OriginalFirstThunk)
-			{
-				thunkRef = (DWORD *)(codeBase + importDesc->OriginalFirstThunk);
-				funcRef = (DWORD *)(codeBase + importDesc->FirstThunk);
+			if (importDesc->OriginalFirstThunk) {
+				thunkRef = (POINTER_TYPE *) (codeBase + importDesc->OriginalFirstThunk);
+				funcRef = (FARPROC *) (codeBase + importDesc->FirstThunk);
 			} else {
 				// no hint table
-				thunkRef = (DWORD *)(codeBase + importDesc->FirstThunk);
-				funcRef = (DWORD *)(codeBase + importDesc->FirstThunk);
+				thunkRef = (POINTER_TYPE *) (codeBase + importDesc->FirstThunk);
+				funcRef = (FARPROC *) (codeBase + importDesc->FirstThunk);
 			}
-			for (; *thunkRef; thunkRef++, funcRef++)
-			{
-				if IMAGE_SNAP_BY_ORDINAL(*thunkRef) {
-					*funcRef = (DWORD)MyGetProcAddress(handle, (LPCSTR)IMAGE_ORDINAL(*thunkRef));
+			for (; *thunkRef; thunkRef++, funcRef++) {
+				if (IMAGE_SNAP_BY_ORDINAL(*thunkRef)) {
+					*funcRef = (FARPROC)GetProcAddress(handle, (LPCSTR)IMAGE_ORDINAL(*thunkRef));
 				} else {
-					PIMAGE_IMPORT_BY_NAME thunkData = (PIMAGE_IMPORT_BY_NAME)(codeBase + *thunkRef);
-					*funcRef = (DWORD)MyGetProcAddress(handle, (LPCSTR)&thunkData->Name);
+					PIMAGE_IMPORT_BY_NAME thunkData = (PIMAGE_IMPORT_BY_NAME) (codeBase + (*thunkRef));
+					*funcRef = (FARPROC)GetProcAddress(handle, (LPCSTR)&thunkData->Name);
 				}
-				if (*funcRef == 0)
-				{
-					SetLastError(ERROR_PROC_NOT_FOUND);
+				if (*funcRef == 0) {
 					result = 0;
 					break;
 				}
 			}

-			if (!result)
+			if (!result) {
 				break;
 			}
 		}
+	}

 	return result;
 }

-/*
-  MemoryLoadLibrary - load a library AS MEMORY MODULE, or return
-  existing MEMORY MODULE with increased refcount.
-
-  This allows to load a library AGAIN as memory module which is
-  already loaded as HMODULE!
-
-*/
-HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
+HMEMORYMODULE MemoryLoadLibrary(const void *data)
 {
 	PMEMORYMODULE result;
 	PIMAGE_DOS_HEADER dos_header;
 	PIMAGE_NT_HEADERS old_header;
 	unsigned char *code, *headers;
-	DWORD locationDelta;
+	SIZE_T locationDelta;
 	DllEntryProc DllEntry;
 	BOOL successfull;
-	MEMORYMODULE *p = loaded;
-
-	while (p) {
-		// If already loaded, only increment the reference count
-		if (0 == stricmp(name, p->name)) {
-			p->refcount++;
-			return (HMODULE)p;
-		}
-		p = p->next;
-	}
-
-	/* Do NOT check for GetModuleHandle here! */

 	dos_header = (PIMAGE_DOS_HEADER)data;
-	if (dos_header->e_magic != IMAGE_DOS_SIGNATURE)
-	{
-		SetLastError(ERROR_BAD_FORMAT);
+	if (dos_header->e_magic != IMAGE_DOS_SIGNATURE) {
 #if DEBUG_OUTPUT
 		OutputDebugString("Not a valid executable file.\n");
 #endif
@ -455,9 +311,7 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
 	}

 	old_header = (PIMAGE_NT_HEADERS)&((const unsigned char *)(data))[dos_header->e_lfanew];
-	if (old_header->Signature != IMAGE_NT_SIGNATURE)
-	{
-		SetLastError(ERROR_BAD_FORMAT);
+	if (old_header->Signature != IMAGE_NT_SIGNATURE) {
 #if DEBUG_OUTPUT
 		OutputDebugString("No PE header found.\n");
 #endif
@ -470,31 +324,25 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
 		MEM_RESERVE,
 		PAGE_READWRITE);

-    if (code == NULL)
+    if (code == NULL) {
        // try to allocate memory at arbitrary position
        code = (unsigned char *)VirtualAlloc(NULL,
            old_header->OptionalHeader.SizeOfImage,
            MEM_RESERVE,
            PAGE_READWRITE);
-    
-	if (code == NULL)
-	{
-		SetLastError(ERROR_NOT_ENOUGH_MEMORY);
+		if (code == NULL) {
 #if DEBUG_OUTPUT
 			OutputLastError("Can't reserve memory");
 #endif
 			return NULL;
 		}
+	}
    
 	result = (PMEMORYMODULE)HeapAlloc(GetProcessHeap(), 0, sizeof(MEMORYMODULE));
 	result->codeBase = code;
 	result->numModules = 0;
 	result->modules = NULL;
 	result->initialized = 0;
-	result->next = result->prev = NULL;
-	result->refcount = 1;
-	result->name = strdup(name);
-	result->name_table = NULL;

 	// XXX: is it correct to commit the complete memory region at once?
    //      calling DllEntry raises an exception if we don't...
@ -514,31 +362,30 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
 	result->headers = (PIMAGE_NT_HEADERS)&((const unsigned char *)(headers))[dos_header->e_lfanew];

 	// update position
-	result->headers->OptionalHeader.ImageBase = (DWORD)code;
+	result->headers->OptionalHeader.ImageBase = (POINTER_TYPE)code;

 	// copy sections from DLL file block to new memory location
 	CopySections(data, old_header, result);

 	// adjust base address of imported data
-	locationDelta = (DWORD)(code - old_header->OptionalHeader.ImageBase);
-	if (locationDelta != 0)
+	locationDelta = (SIZE_T)(code - old_header->OptionalHeader.ImageBase);
+	if (locationDelta != 0) {
 		PerformBaseRelocation(result, locationDelta);
+	}

 	// load required dlls and adjust function table of imports
-	if (!BuildImportTable(result))
+	if (!BuildImportTable(result)) {
 		goto error;
+	}

 	// mark memory pages depending on section headers and release
 	// sections that are marked as "discardable"
 	FinalizeSections(result);

 	// get entry point of loaded library
-	if (result->headers->OptionalHeader.AddressOfEntryPoint != 0)
-	{
-		DllEntry = (DllEntryProc)(code + result->headers->OptionalHeader.AddressOfEntryPoint);
-		if (DllEntry == 0)
-		{
-			SetLastError(ERROR_BAD_FORMAT); /* XXX ? */
+	if (result->headers->OptionalHeader.AddressOfEntryPoint != 0) {
+		DllEntry = (DllEntryProc) (code + result->headers->OptionalHeader.AddressOfEntryPoint);
+		if (DllEntry == 0) {
 #if DEBUG_OUTPUT
 			OutputDebugString("Library has no entry point.\n");
 #endif
@ -547,8 +394,7 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)

 		// notify library about attaching to process
 		successfull = (*DllEntry)((HINSTANCE)code, DLL_PROCESS_ATTACH, 0);
-		if (!successfull)
-		{
+		if (!successfull) {
 #if DEBUG_OUTPUT
 			OutputDebugString("Can't attach library.\n");
 #endif
@ -557,99 +403,55 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
 		result->initialized = 1;
 	}

-	_Register(name, result);
-
 	return (HMEMORYMODULE)result;

 error:
 	// cleanup
-	free(result->name);
 	MemoryFreeLibrary(result);
 	return NULL;
 }

-int _compare(const struct NAME_TABLE *p1, const struct NAME_TABLE *p2)
-{
-	return stricmp(p1->name, p2->name);
-}
-
-int _find(const char **name, const struct NAME_TABLE *p)
-{
-	return stricmp(*name, p->name);
-}
-
-struct NAME_TABLE *GetNameTable(PMEMORYMODULE module)
-{
-	unsigned char *codeBase;
-	PIMAGE_EXPORT_DIRECTORY exports;
-	PIMAGE_DATA_DIRECTORY directory;
-	DWORD i, *nameRef;
-	WORD *ordinal;
-	struct NAME_TABLE *p, *ptab;
-
-	if (module->name_table)
-		return module->name_table;
-
-	codeBase = module->codeBase;
-	directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_EXPORT);
-	exports = (PIMAGE_EXPORT_DIRECTORY)(codeBase + directory->VirtualAddress);
-
-	nameRef = (DWORD *)(codeBase + exports->AddressOfNames);
-	ordinal = (WORD *)(codeBase + exports->AddressOfNameOrdinals);
-
-	p = ((PMEMORYMODULE)module)->name_table = (struct NAME_TABLE *)malloc(sizeof(struct NAME_TABLE)
-									      * exports->NumberOfNames);
-	if (p == NULL)
-		return NULL;
-	ptab = p;
-	for (i=0; i<exports->NumberOfNames; ++i) {
-		p->name = (char *)(codeBase + *nameRef++);
-		p->ordinal = *ordinal++;
-		++p;
-	}
-	qsort(ptab, exports->NumberOfNames, sizeof(struct NAME_TABLE), _compare);
-	return ptab;
-}
-
 FARPROC MemoryGetProcAddress(HMEMORYMODULE module, const char *name)
 {
 	unsigned char *codeBase = ((PMEMORYMODULE)module)->codeBase;
 	int idx=-1;
+	DWORD i, *nameRef;
+	WORD *ordinal;
 	PIMAGE_EXPORT_DIRECTORY exports;
 	PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY((PMEMORYMODULE)module, IMAGE_DIRECTORY_ENTRY_EXPORT);
-
-	if (directory->Size == 0)
+	if (directory->Size == 0) {
 		// no export table found
 		return NULL;
+	}

-	exports = (PIMAGE_EXPORT_DIRECTORY)(codeBase + directory->VirtualAddress);
-	if (exports->NumberOfNames == 0 || exports->NumberOfFunctions == 0)
+	exports = (PIMAGE_EXPORT_DIRECTORY) (codeBase + directory->VirtualAddress);
+	if (exports->NumberOfNames == 0 || exports->NumberOfFunctions == 0) {
 		// DLL doesn't export anything
 		return NULL;
+	}

-	if (HIWORD(name)) {
-		struct NAME_TABLE *ptab;
-		struct NAME_TABLE *found;
-		ptab = GetNameTable((PMEMORYMODULE)module);
-		if (ptab == NULL)
-			// some failure
-			return NULL;
-		found = bsearch(&name, ptab, exports->NumberOfNames, sizeof(struct NAME_TABLE), _find);
-		if (found == NULL)
+	// search function name in list of exported names
+	nameRef = (DWORD *) (codeBase + exports->AddressOfNames);
+	ordinal = (WORD *) (codeBase + exports->AddressOfNameOrdinals);
+	for (i=0; i<exports->NumberOfNames; i++, nameRef++, ordinal++) {
+		if (_stricmp(name, (const char *) (codeBase + (*nameRef))) == 0) {
+			idx = *ordinal;
+			break;
+		}
+	}
+
+	if (idx == -1) {
 		// exported symbol not found
 		return NULL;
-	
-		idx = found->ordinal;
 	}
-	else
-		idx = LOWORD(name) - exports->Base;

-	if ((DWORD)idx > exports->NumberOfFunctions)
+	if ((DWORD)idx > exports->NumberOfFunctions) {
 		// name <-> ordinal number don't match
 		return NULL;
+	}

 	// AddressOfFunctions contains the RVAs to the "real" functions
-	return (FARPROC)(codeBase + *(DWORD *)(codeBase + exports->AddressOfFunctions + (idx*4)));
+	return (FARPROC) (codeBase + (*(DWORD *) (codeBase + exports->AddressOfFunctions + (idx*4))));
 }

 void MemoryFreeLibrary(HMEMORYMODULE mod)
@ -657,32 +459,29 @@ void MemoryFreeLibrary(HMEMORYMODULE mod)
 	int i;
 	PMEMORYMODULE module = (PMEMORYMODULE)mod;

-	if (module != NULL)
-	{
-		if (module->initialized != 0)
-		{
+	if (module != NULL) {
+		if (module->initialized != 0) {
 			// notify library about detaching from process
-			DllEntryProc DllEntry = (DllEntryProc)(module->codeBase + module->headers->OptionalHeader.AddressOfEntryPoint);
+			DllEntryProc DllEntry = (DllEntryProc) (module->codeBase + module->headers->OptionalHeader.AddressOfEntryPoint);
 			(*DllEntry)((HINSTANCE)module->codeBase, DLL_PROCESS_DETACH, 0);
 			module->initialized = 0;
 		}

-		if (module->modules != NULL)
-		{
+		if (module->modules != NULL) {
 			// free previously opened libraries
-			for (i=0; i<module->numModules; i++)
-				if (module->modules[i] != INVALID_HANDLE_VALUE)
-					MyFreeLibrary(module->modules[i]);
+			for (i=0; i<module->numModules; i++) {
+				if (module->modules[i] != INVALID_HANDLE_VALUE) {
+					FreeLibrary(module->modules[i]);
+				}
+			}

 			free(module->modules);
 		}

-		if (module->codeBase != NULL)
+		if (module->codeBase != NULL) {
 			// release memory of library
 			VirtualFree(module->codeBase, 0, MEM_RELEASE);
-
-		if (module->name_table != NULL)
-			free(module->name_table);
+		}

 		HeapFree(GetProcessHeap(), 0, module);
 	}
--- a/setup/installer/windows/MemoryModule.h
+++ b/setup/installer/windows/MemoryModule.h
@ -1,12 +1,12 @@
 /*
 * Memory DLL loading code
- * Version 0.0.2
+ * Version 0.0.3
 *
- * Copyright (c) 2004-2005 by Joachim Bauch / mail@joachim-bauch.de
+ * Copyright (c) 2004-2012 by Joachim Bauch / mail@joachim-bauch.de
 * http://www.joachim-bauch.de
 *
 * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
+ * 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
@ -19,7 +19,7 @@
 *
 * The Initial Developer of the Original Code is Joachim Bauch.
 *
- * Portions created by Joachim Bauch are Copyright (C) 2004-2005
+ * Portions created by Joachim Bauch are Copyright (C) 2004-2012
 * Joachim Bauch. All Rights Reserved.
 *
 */
@ -35,22 +35,12 @@ typedef void *HMEMORYMODULE;
 extern "C" {
 #endif

-typedef void *(*FINDPROC)();
-
-extern FINDPROC findproc;
-extern void *findproc_data;
-
-HMEMORYMODULE MemoryLoadLibrary(char *, const void *);
+HMEMORYMODULE MemoryLoadLibrary(const void *);

 FARPROC MemoryGetProcAddress(HMEMORYMODULE, const char *);

 void MemoryFreeLibrary(HMEMORYMODULE);

-BOOL MyFreeLibrary(HMODULE hModule);
-HMODULE MyLoadLibrary(char *lpFileName);
-FARPROC MyGetProcAddress(HMODULE hModule, LPCSTR lpProcName);
-HMODULE MyGetModuleHandle(LPCTSTR lpModuleName);
-
 #ifdef __cplusplus
 }
 #endif
--- a/setup/installer/windows/init.py
+++ b/setup/installer/windows/init.py
@ -8,53 +8,66 @@ __docformat__ = 'restructuredtext en'

 import os, shutil, subprocess

-from setup import Command, __appname__, __version__
+from setup import Command, __appname__, __version__, installer_name
 from setup.installer import VMInstaller

 class Win(Command):

    description = 'Build windows binary installers'

-    sub_commands = ['win32']
+    sub_commands = ['win64', 'win32']

    def run(self, opts):
        pass

-
-class Win32(VMInstaller):
-
-    description = 'Build 32bit windows binary installer'
-
-    INSTALLER_EXT = 'exe'
-    VM_NAME = 'xp_build'
-    VM = '/vmware/bin/%s'%VM_NAME
-    VM_CHECK = 'calibre_windows_xp_home'
+class WinBase(VMInstaller):
    FREEZE_COMMAND = 'win32_freeze'
    FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
    INSTALLER_EXT = 'msi'
    SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']

-    def sign_msi(self):
-        print ('Signing installers ...')
-        subprocess.check_call(['ssh', self.VM_NAME, '~/sign.sh'], shell=False)
+
+class Win32(WinBase):
+
+    description = 'Build 32bit windows binary installer'
+
+    VM_NAME = 'xp_build'
+    VM = '/vmware/bin/%s'%VM_NAME
+    VM_CHECK = 'calibre_windows_xp_home'
+
+    @property
+    def msi64(self):
+        return installer_name('msi', is64bit=True)
+
+    def do_dl(self, installer, errmsg):
+        subprocess.check_call(('scp',
+            '%s:build/%s/%s'%(self.VM_NAME, __appname__, installer), 'dist'))
+        if not os.path.exists(installer):
+            self.warn(errmsg)
+            raise SystemExit(1)

    def download_installer(self):
        installer = self.installer()
        if os.path.exists('build/winfrozen'):
            shutil.rmtree('build/winfrozen')
-        self.sign_msi()
-
-        subprocess.check_call(('scp',
-            'xp_build:build/%s/%s'%(__appname__, installer), 'dist'))
-        if not os.path.exists(installer):
-            self.warn('Failed to freeze')
-            raise SystemExit(1)

+        self.do_dl(installer, 'Failed to freeze')
        installer = 'dist/%s-portable-installer-%s.exe'%(__appname__, __version__)
-        subprocess.check_call(('scp',
-            'xp_build:build/%s/%s'%(__appname__, installer), 'dist'))
-        if not os.path.exists(installer):
-            self.warn('Failed to get portable installer')
-            raise SystemExit(1)
+        self.do_dl(installer, 'Failed to get portable installer')
+
+class Win64(WinBase):
+
+    description = 'Build 64bit windows binary installer'
+
+    VM_NAME = 'win64'
+    VM = '/vmware/bin/%s'%VM_NAME
+    VM_CHECK = 'win64'
+    IS_64_BIT = True
+    BUILD_PREFIX = WinBase.BUILD_PREFIX + [
+        'if [ -f "$HOME/.bash_profile" ] ; then',
+        '    source "$HOME/.bash_profile"',
+        'fi',
+    ]
+


--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -25,6 +25,7 @@ LZMA = r'Q:\easylzma\build\easylzma-0.0.8'

 VERSION = re.sub('[a-z]\d+', '', __version__)
 WINVER = VERSION+'.0'
+machine = 'X64' if is64bit else 'X86'

 DESCRIPTIONS = {
        'calibre' : 'The main calibre program',
@ -90,6 +91,7 @@ class Win32Freeze(Command, WixMixIn):
        if not is64bit:
            self.build_portable()
            self.build_portable_installer()
+        self.sign_installers()

    def remove_CRT_from_manifests(self):
        '''
@ -110,7 +112,7 @@ class Win32Freeze(Command, WixMixIn):
            self.info('Removing CRT dependency from manifest of: %s'%bn)
            # Blank out the bytes corresponding to the dependency specification
            nraw = repl_pat.sub(lambda m: b' '*len(m.group()), raw)
-            if len(nraw) != len(raw):
+            if len(nraw) != len(raw) or nraw == raw:
                raise Exception('Something went wrong with %s'%bn)
            with open(dll, 'wb') as f:
                f.write(nraw)
@ -132,6 +134,23 @@ class Win32Freeze(Command, WixMixIn):
            # used instead
            shutil.copy2(f, tgt)

+    def fix_pyd_bootstraps_in(self, folder):
+        for dirpath, dirnames, filenames in os.walk(folder):
+            for f in filenames:
+                name, ext = os.path.splitext(f)
+                bpy = self.j(dirpath, name + '.py')
+                if ext == '.pyd' and os.path.exists(bpy):
+                    with open(bpy, 'rb') as f:
+                        raw = f.read().strip()
+                    if (not raw.startswith('def __bootstrap__') or not
+                            raw.endswith('__bootstrap__()')):
+                        raise Exception('The file %r has non'
+                                ' bootstrap code'%self.j(dirpath, f))
+                    for ext in ('.py', '.pyc', '.pyo'):
+                        x = self.j(dirpath, name+ext)
+                        if os.path.exists(x):
+                            os.remove(x)
+
    def freeze(self):
        shutil.copy2(self.j(self.src_root, 'LICENSE'), self.base)

@ -184,23 +203,12 @@ class Win32Freeze(Command, WixMixIn):
        shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell'))
        shutil.rmtree(comext)

-        # Fix PyCrypto, removing the bootstrap .py modules that load the .pyd
-        # modules, since they do not work when in a zip file
-        for crypto_dir in glob.glob(self.j(sp_dir, 'pycrypto-*', 'Crypto')):
-            for dirpath, dirnames, filenames in os.walk(crypto_dir):
-                for f in filenames:
-                    name, ext = os.path.splitext(f)
-                    if ext == '.pyd':
-                        with open(self.j(dirpath, name+'.py')) as f:
-                            raw = f.read().strip()
-                        if (not raw.startswith('def __bootstrap__') or not
-                                raw.endswith('__bootstrap__()')):
-                            raise Exception('The PyCrypto file %r has non'
-                                    ' bootstrap code'%self.j(dirpath, f))
-                        for ext in ('.py', '.pyc', '.pyo'):
-                            x = self.j(dirpath, name+ext)
-                            if os.path.exists(x):
-                                os.remove(x)
+        # Fix PyCrypto and Pillow, removing the bootstrap .py modules that load
+        # the .pyd modules, since they do not work when in a zip file
+        for folder in os.listdir(sp_dir):
+            folder = self.j(sp_dir, folder)
+            if os.path.isdir(folder):
+                self.fix_pyd_bootstraps_in(folder)

        for pat in (r'PyQt4\uic\port_v3', ):
            x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0]
@ -367,7 +375,7 @@ class Win32Freeze(Command, WixMixIn):
        if not self.opts.keep_site:
            os.remove(y)

-    def run_builder(self, cmd):
+    def run_builder(self, cmd, show_output=False):
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
        if p.wait() != 0:
@ -376,6 +384,9 @@ class Win32Freeze(Command, WixMixIn):
            self.info(p.stdout.read())
            self.info(p.stderr.read())
            sys.exit(1)
+        if show_output:
+            self.info(p.stdout.read())
+            self.info(p.stderr.read())

    def build_portable_installer(self):
        zf = self.a(self.j('dist', 'calibre-portable-%s.zip.lz'%VERSION))
@ -401,7 +412,7 @@ class Win32Freeze(Command, WixMixIn):
        exe = self.j('dist', 'calibre-portable-installer-%s.exe'%VERSION)
        if self.newer(exe, [obj, xobj]):
            self.info('Linking', exe)
-            cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
+            cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:'+machine,
                    '/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
                    '/LIBPATH:'+(LZMA+r'\lib\Release'),
                    '/RELEASE', '/MANIFEST', '/MANIFESTUAC:level="asInvoker" uiAccess="false"',
@ -458,7 +469,7 @@ class Win32Freeze(Command, WixMixIn):
        exe = self.j(base, 'calibre-portable.exe')
        if self.newer(exe, [obj]):
            self.info('Linking', exe)
-            cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
+            cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:'+machine,
                    '/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
                    '/RELEASE',
                    '/ENTRY:wWinMainCRTStartup',
@ -478,6 +489,17 @@ class Win32Freeze(Command, WixMixIn):

        subprocess.check_call([LZMA + r'\bin\elzma.exe', '-9', '--lzip', name])

+    def sign_installers(self):
+        self.info('Signing installers...')
+        files = glob.glob(self.j('dist', '*.msi')) + glob.glob(self.j('dist',
+                                                                      '*.exe'))
+        if not files:
+            raise ValueError('No installers found')
+        subprocess.check_call(['signtool.exe', 'sign', '/a', '/d',
+            'calibre - E-book management', '/du',
+            'http://calibre-ebook.com', '/t',
+            'http://timestamp.verisign.com/scripts/timstamp.dll'] + files)
+
    def add_dir_to_zip(self, zf, path, prefix=''):
        '''
        Add a directory recursively to the zip file with an optional prefix.
@ -499,9 +521,11 @@ class Win32Freeze(Command, WixMixIn):
        finally:
            os.chdir(cwd)

-    def build_launchers(self):
+    def build_launchers(self, debug=False):
        if not os.path.exists(self.obj_dir):
            os.makedirs(self.obj_dir)
+        dflags = (['/Zi'] if debug else [])
+        dlflags = (['/DEBUG'] if debug else ['/INCREMENTAL:NO'])
        base = self.j(self.src_root, 'setup', 'installer', 'windows')
        sources = [self.j(base, x) for x in ['util.c', 'MemoryModule.c']]
        headers = [self.j(base, x) for x in ['util.h', 'MemoryModule.h']]
@ -510,20 +534,20 @@ class Win32Freeze(Command, WixMixIn):
        cflags += ['/DPYDLL="python%s.dll"'%self.py_ver, '/IC:/Python%s/include'%self.py_ver]
        for src, obj in zip(sources, objects):
            if not self.newer(obj, headers+[src]): continue
-            cmd = [msvc.cc] + cflags + ['/Fo'+obj, '/Tc'+src]
-            self.run_builder(cmd)
+            cmd = [msvc.cc] + cflags + dflags + ['/Fo'+obj, '/Tc'+src]
+            self.run_builder(cmd, show_output=True)

        dll = self.j(self.obj_dir, 'calibre-launcher.dll')
        ver = '.'.join(__version__.split('.')[:2])
        if self.newer(dll, objects):
-            cmd = [msvc.linker, '/DLL', '/INCREMENTAL:NO', '/VERSION:'+ver,
-                    '/OUT:'+dll, '/nologo', '/MACHINE:X86'] + objects + \
+            cmd = [msvc.linker, '/DLL', '/VERSION:'+ver, '/OUT:'+dll,
+                   '/nologo', '/MACHINE:'+machine] + dlflags + objects + \
                [self.embed_resources(dll),
                '/LIBPATH:C:/Python%s/libs'%self.py_ver,
                'python%s.lib'%self.py_ver,
                '/delayload:python%s.dll'%self.py_ver]
            self.info('Linking calibre-launcher.dll')
-            self.run_builder(cmd)
+            self.run_builder(cmd, show_output=True)

        src = self.j(base, 'main.c')
        shutil.copy2(dll, self.base)
@ -541,16 +565,16 @@ class Win32Freeze(Command, WixMixIn):
                dest = self.j(self.obj_dir, bname+'.obj')
                if self.newer(dest, [src]+headers):
                    self.info('Compiling', bname)
-                    cmd = [msvc.cc] + xflags + ['/Tc'+src, '/Fo'+dest]
+                    cmd = [msvc.cc] + xflags + dflags + ['/Tc'+src, '/Fo'+dest]
                    self.run_builder(cmd)
                exe = self.j(self.base, bname+'.exe')
                lib = dll.replace('.dll', '.lib')
                if self.newer(exe, [dest, lib, self.rc_template, __file__]):
                    self.info('Linking', bname)
-                    cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
+                    cmd = [msvc.linker] + ['/MACHINE:'+machine,
                            '/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:'+subsys,
                            '/LIBPATH:C:/Python%s/libs'%self.py_ver, '/RELEASE',
-                            '/OUT:'+exe, self.embed_resources(exe),
+                            '/OUT:'+exe] + dlflags + [self.embed_resources(exe),
                            dest, lib]
                    self.run_builder(cmd)

@ -563,12 +587,18 @@ class Win32Freeze(Command, WixMixIn):
            for x in (self.plugins_dir, self.dll_dir):
                for pyd in os.listdir(x):
                    if pyd.endswith('.pyd') and pyd not in {
-                            'unrar.pyd', 'sqlite_custom.pyd', 'calibre_style.pyd'}:
                        # sqlite_custom has to be a file for
                        # sqlite_load_extension to work
-                        # For some reason unrar.pyd crashes when processing
-                        # password protected RAR files if loaded from inside
-                        # pylib.zip
+                        'sqlite_custom.pyd',
+                        # calibre_style has to be loaded by Qt therefore it
+                        # must be a file
+                        'calibre_style.pyd',
+                        # Because of https://github.com/fancycode/MemoryModule/issues/4
+                        # any extensions that use C++ exceptions must be loaded
+                        # from files
+                        'unrar.pyd', 'wpd.pyd', 'podofo.pyd',
+                        'progress_indicator.pyd',
+                        }:
                        self.add_to_zipfile(zf, pyd, x)
                        os.remove(self.j(x, pyd))

@ -581,6 +611,7 @@ class Win32Freeze(Command, WixMixIn):
            sp = self.j(self.lib_dir, 'site-packages')
            # Special handling for PIL and pywin32
            handled = set(['PIL.pth', 'pywin32.pth', 'PIL', 'win32'])
+            if not is64bit:
                self.add_to_zipfile(zf, 'PIL', sp)
            base = self.j(sp, 'win32', 'lib')
            for x in os.listdir(base):
@ -593,16 +624,17 @@ class Win32Freeze(Command, WixMixIn):
                        self.add_to_zipfile(zf, x, base)

            handled.add('easy-install.pth')
+            # We dont want the site.py from site-packages
+            handled.add('site.pyo')
+
            for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
                handled.add(self.b(d))
                for x in os.listdir(d):
-                    if x == 'EGG-INFO':
+                    if x in {'EGG-INFO', 'site.py', 'site.pyc', 'site.pyo'}:
                        continue
                    self.add_to_zipfile(zf, x, d)

            # The rest of site-packages
-            # We dont want the site.py from site-packages
-            handled.add('site.pyo')
            for x in os.listdir(sp):
                if x in handled or x.endswith('.egg-info'):
                    continue
@ -622,8 +654,10 @@ class Win32Freeze(Command, WixMixIn):
            line = line.strip()
            if not line or line.startswith('#') or line.startswith('import'):
                continue
-            candidate = self.j(base, line)
+            candidate = os.path.abspath(self.j(base, line))
            if os.path.exists(candidate):
+                if not os.path.isdir(candidate):
+                    raise ValueError('%s is not a directory'%candidate)
                yield candidate

    def add_to_zipfile(self, zf, name, base, exclude=frozenset()):
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -109,10 +109,8 @@ of mimetypes from the windows registry
 Python packages
 ------------------

-Install setuptools from http://pypi.python.org/pypi/setuptools If there are no
-windows binaries already compiled for the version of python you are using then
-download the source and run the following command in the folder where the
-source has been unpacked::
+Install setuptools from http://pypi.python.org/pypi/setuptools. Use the source
+tarball. Edit setup.py and set zip_safe=False. Then run::

     python setup.py install

--- a/setup/installer/windows/portable-installer.cpp
+++ b/setup/installer/windows/portable-installer.cpp
@ -418,9 +418,12 @@ static BOOL move_program() {
    }

    if (MoveFileEx(L"Calibre Portable\\Calibre", L"..\\Calibre", 0) == 0) {
-        show_last_error(L"Failed to move calibre program folder");
+        Sleep(4000); // Sleep and try again
+        if (MoveFileEx(L"Calibre Portable\\Calibre", L"..\\Calibre", 0) == 0) {
+            show_last_error(L"Failed to move calibre program folder. This is usually caused by an antivirus program or a file sync program like DropBox. Turn them off temporarily and try again. Underlying error: ");
            return false;
        }
+    }

    if (!directory_exists(L"..\\Calibre Library")) {
        MoveFileEx(L"Calibre Portable\\Calibre Library", L"..\\Calibre Library", 0);
--- a/setup/installer/windows/util.c
+++ b/setup/installer/windows/util.c
@ -16,6 +16,7 @@ static char python_dll[] = PYDLL;
 void set_gui_app(char yes) { GUI_APP = yes; }
 char is_gui_app() { return GUI_APP; }

+int calibre_show_python_error(const wchar_t *preamble, int code);

 // memimporter {{{

@ -63,17 +64,6 @@ static void* FindLibrary(char *name, PyObject *callback)
 	return p;
 }

-static PyObject *set_find_proc(PyObject *self, PyObject *args)
-{
-	PyObject *callback = NULL;
-	if (!PyArg_ParseTuple(args, "|O:set_find_proc", &callback))
-		return NULL;
-	Py_DECREF((PyObject *)findproc_data);
-	Py_INCREF(callback);
-	findproc_data = (void *)callback;
-    return Py_BuildValue("i", 1);
-}
-
 static PyObject *
 import_module(PyObject *self, PyObject *args)
 {
@ -92,7 +82,7 @@ import_module(PyObject *self, PyObject *args)
 			      &data, &size,
 			      &initfuncname, &modname, &pathname))
 		return NULL;
-	hmem = MemoryLoadLibrary(pathname, data);
+	hmem = MemoryLoadLibrary(data);
 	if (!hmem) {
 		PyErr_Format(*DLL_ImportError,
 			     "MemoryLoadLibrary() failed loading %s", pathname);
@ -119,14 +109,14 @@ import_module(PyObject *self, PyObject *args)
 static PyMethodDef methods[] = {
 	{ "import_module", import_module, METH_VARARGS,
 	  "import_module(code, initfunc, dllname[, finder]) -> module" },
-	{ "set_find_proc", set_find_proc, METH_VARARGS },
 	{ NULL, NULL },		/* Sentinel */
 };

 // }}}

 static int _show_error(const wchar_t *preamble, const wchar_t *msg, const int code) {
-    wchar_t *buf, *cbuf;
+    wchar_t *buf;
+    char *cbuf;
    buf = (wchar_t*)LocalAlloc(LMEM_ZEROINIT, sizeof(wchar_t)*
            (wcslen(msg) + wcslen(preamble) + 80));

@ -142,7 +132,7 @@ static int _show_error(const wchar_t *preamble, const wchar_t *msg, const int co
    else {
        cbuf = (char*) calloc(10+(wcslen(buf)*4), sizeof(char));
        if (cbuf) {
-            if (WideCharToMultiByte(CP_UTF8, 0, buf, -1, cbuf, 10+(wcslen(buf)*4), NULL, NULL) != 0) printf_s(cbuf);
+            if (WideCharToMultiByte(CP_UTF8, 0, buf, -1, cbuf, (int)(10+(wcslen(buf)*4)), NULL, NULL) != 0) printf_s(cbuf);
            free(cbuf);
        }
    }
@ -165,6 +155,7 @@ int show_last_error_crt(wchar_t *preamble) {
 int show_last_error(wchar_t *preamble) {
    wchar_t *msg = NULL;
    DWORD dw = GetLastError(); 
+    int ret;

    FormatMessage(
        FORMAT_MESSAGE_ALLOCATE_BUFFER | 
@ -173,10 +164,13 @@ int show_last_error(wchar_t *preamble) {
        NULL,
        dw,
        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-        &msg,
-        0, NULL );
+        (LPWSTR)&msg, 
+        0,
+        NULL );

-    return _show_error(preamble, msg, (int)dw);
+    ret = _show_error(preamble, msg, (int)dw);
+    if (msg != NULL) LocalFree(msg);
+    return ret;
 }

 char* get_app_dir() {
@ -254,10 +248,10 @@ void setup_stream(const char *name, const char *errors, UINT cp) {
    else if (cp == CP_UTF7) _snprintf_s(buf, 100, _TRUNCATE, "%s", "utf-7");
    else _snprintf_s(buf, 100, _TRUNCATE, "cp%d", cp);

-    stream = PySys_GetObject(name);
+    stream = PySys_GetObject((char*)name);

-    if (!PyFile_SetEncodingAndErrors(stream, buf, errors)) 
-        ExitProcess(calibre_show_python_error("Failed to set stream encoding", 1));
+    if (!PyFile_SetEncodingAndErrors(stream, buf, (char*)errors)) 
+        ExitProcess(calibre_show_python_error(L"Failed to set stream encoding", 1));

    free(buf);
    
@ -372,7 +366,6 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
    }
    PySys_SetObject("argv", argv);

-    findproc = FindLibrary;
 	Py_InitModule3("_memimporter", methods, module_doc);

 }
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@ -2,7 +2,7 @@
 <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi' xmlns:util="http://schemas.microsoft.com/wix/UtilExtension"
 >		

-		<Product Name='{app}' Id='*' UpgradeCode='{upgrade_code}'
+<Product Name='{app}{x64}' Id='*' UpgradeCode='{upgrade_code}'
 				Language='1033' Codepage='1252' Version='{version}' Manufacturer='Kovid Goyal'>
 				
 				<Package Id='*' Keywords='Installer' Description="{app} Installer"
@ -29,19 +29,24 @@
                       Language="1033"
                       Property="NEWPRODUCTFOUND"/>
               </Upgrade>
-               <CustomAction Id="PreventDowngrading" Error="Newer version already installed."/>
+               <CustomAction Id="PreventDowngrading" Error="Newer version of {app} already installed. If you want to downgrade you must uninstall {app} first."/>

               <Property Id="APPLICATIONFOLDER">
                  <RegistrySearch Id='calibreInstDir' Type='raw'
-                      Root='HKLM' Key="Software\{app}\Installer" Name="InstallPath" />
+                      Root='HKLM' Key="Software\{app}{x64}\Installer" Name="InstallPath" />
               </Property>

                <Directory Id='TARGETDIR' Name='SourceDir'>
-						<Directory Id='ProgramFilesFolder' Name='PFiles'>
-								<Directory Id='APPLICATIONFOLDER' Name='{app}' />
+                        <Directory Id='{ProgramFilesFolder}' Name='PFiles'>
+                            <!-- The name must be calibre on 32 bit to ensure
+                                 that the component guids dont change compared
+                                 to previous msis. However, on 64 bit it must
+                                 be Calibre2 otherwise by default it will
+                                 install to C:\Program Files\calibre -->
+                            <Directory Id='APPLICATIONFOLDER' Name="{appfolder}" />
 						</Directory>
                        <Directory Id="ProgramMenuFolder">
-                            <Directory Id="ApplicationProgramsFolder" Name="{app} - E-book Management"/>
+                            <Directory Id="ApplicationProgramsFolder" Name="{app}{x64} - E-book Management"/>
                        </Directory>
                        <Directory Id="DesktopFolder" Name="Desktop"/>
 				</Directory>
@ -50,24 +55,24 @@
 						{app_components}
                        <Component Id="AddToPath" Guid="*">
                            <Environment Id='UpdatePath' Name='PATH' Action='set' System='yes' Part='last' Value='[APPLICATIONFOLDER]' />
-                            <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/>
+                            <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/>
                        </Component>
                        <Component Id="RememberInstallDir" Guid="*">
-                            <RegistryValue Root="HKLM" Key="Software\{app}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/>
+                            <RegistryValue Root="HKLM" Key="Software\{app}{x64}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/>
                        </Component>
                </DirectoryRef>

                <DirectoryRef Id="ApplicationProgramsFolder">
                    <Component Id="StartMenuShortcuts" Guid="*">
-                        <Shortcut Id="s1" Name="{app} - E-book management"
+                        <Shortcut Id="s1" Name="{app}{x64} - E-book management"
                            Description="Manage your e-book collection and download news"
                            Target="[#{exe_map[calibre]}]"
                             WorkingDirectory="APPLICATIONROOTDIRECTORY" />
-                        <Shortcut Id="s2" Name="E-book viewer"
+                         <Shortcut Id="s2" Name="E-book viewer{x64}"
                            Description="Viewer for all the major e-book formats"
                            Target="[#{exe_map[ebook-viewer]}]"
                             WorkingDirectory="APPLICATIONROOTDIRECTORY" />
-                        <Shortcut Id="s3" Name="LRF viewer"
+                         <Shortcut Id="s3" Name="LRF viewer{x64}"
                            Description="Viewer for LRF format e-books"
                            Target="[#{exe_map[lrfviewer]}]"
                             WorkingDirectory="APPLICATIONROOTDIRECTORY" />
@ -79,17 +84,17 @@
                            Target="http://calibre-ebook.com/get-involved"/>

                        <RemoveFolder Id="ApplicationProgramsFolder" On="uninstall"/>
-                        <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="start_menu_shortcuts_installed" Type="integer" Value="1" KeyPath="yes"/>
+                        <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="start_menu_shortcuts_installed" Type="integer" Value="1" KeyPath="yes"/>
                    </Component>
                </DirectoryRef>

                <DirectoryRef Id="DesktopFolder">
                    <Component Id="DesktopShortcut" Guid="*">
-                        <Shortcut Id="ds1" Name="{app} - E-book management"
+                        <Shortcut Id="ds1" Name="{app}{x64} - E-book management"
                            Description="Manage your e-book collection and download news"
                            Target="[#{exe_map[calibre]}]"
                            WorkingDirectory="APPLICATIONROOTDIRECTORY" />
-                        <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="desktop_shortcut_installed" Type="integer" Value="1" KeyPath="yes"/>
+                        <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="desktop_shortcut_installed" Type="integer" Value="1" KeyPath="yes"/>
                    </Component> 
                </DirectoryRef>
         
@ -122,17 +127,35 @@
 				<!-- Add icon to entry in Add/Remove programs -->
                <Icon Id="main_icon" SourceFile="{main_icon}"/>
 				<Property Id="ARPPRODUCTICON" Value="main_icon" />
+                <Property Id="ARPURLINFOABOUT" Value="http://calibre-ebook.com" />
+                <Property Id='ARPHELPLINK' Value="http://calibre-ebook.com/help" />
+                <Property Id='ARPURLUPDATEINFO' Value="http://calibre-ebook.com/download_windows" />
+                <SetProperty Id="ARPINSTALLLOCATION" Value="[APPLICATIONFOLDER]" After="CostFinalize" />

                <Condition
-                    Message="This application is only supported on Windows XP SP3, or higher.">
-                    <![CDATA[Installed OR (VersionNT >= 501)]]>
+                    Message="This application is only supported on {minverhuman}, or higher.">
+                    <![CDATA[Installed OR (VersionNT >= {minver})]]>
                </Condition>
+                <!-- On 64 bit installers there is a bug in WiX that causes the
+                     WixSetDefaultPerMachineFolder action to incorrectly set
+                     APPLICATIONFOLDER to the x86 value, so we override it. See
+                     http://stackoverflow.com/questions/5479790/wix-how-to-override-c-program-files-x86-on-x64-machine-in-wixui-advanced-s
+                     -->
+                <CustomAction
+                    Id="OverwriteWixSetDefaultPerMachineFolder"
+                    Property="WixPerMachineFolder"
+                    Value="[APPLICATIONFOLDER]"
+                    Execute="immediate"
+                    />
+
                <InstallExecuteSequence>
                    <Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
+                    {fix_wix}
                    <RemoveExistingProducts After="InstallFinalize" />
                </InstallExecuteSequence>
                <InstallUISequence>
                    <Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
+                    {fix_wix}
                </InstallUISequence>

                <UI>
--- a/setup/installer/windows/wix.py
+++ b/setup/installer/windows/wix.py
@ -6,11 +6,20 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, shutil, subprocess
+import os, shutil, subprocess, sys

 from setup import __appname__, __version__, basenames
+from setup.build_environment import is64bit
+
+if is64bit:
+    WIXP = r'C:\Program Files (x86)\WiX Toolset v3.6'
+    UPGRADE_CODE = '5DD881FF-756B-4097-9D82-8C0F11D521EA'
+    MINVERHUMAN = 'Windows Vista'
+else:
+    WIXP = r'C:\Program Files\WiX Toolset v3.6'
+    UPGRADE_CODE = 'BEB2A80D-E902-4DAD-ADF9-8BD2DA42CFE1'
+    MINVERHUMAN = 'Windows XP SP3'

-WIXP = r'C:\Program Files\Windows Installer XML v3.5'
 CANDLE = WIXP+r'\bin\candle.exe'
 LIGHT = WIXP+r'\bin\light.exe'

@ -28,8 +37,14 @@ class WixMixIn:
        components = self.get_components_from_files()
        wxs = template.format(
            app                = __appname__,
+            appfolder          = 'Calibre2' if is64bit else __appname__,
            version            = __version__,
-                upgrade_code      = 'BEB2A80D-E902-4DAD-ADF9-8BD2DA42CFE1',
+            upgrade_code       = UPGRADE_CODE,
+            ProgramFilesFolder = 'ProgramFiles64Folder' if is64bit else 'ProgramFilesFolder',
+            x64                = ' 64bit' if is64bit else '',
+            minverhuman        = MINVERHUMAN,
+            minver             = '600' if is64bit else '501',
+            fix_wix = '<Custom Action="OverwriteWixSetDefaultPerMachineFolder" After="WixSetDefaultPerMachineFolder" />' if is64bit else '',
            compression        = self.opts.msi_compression,
            app_components     = components,
            exe_map            = self.smap,
@ -48,14 +63,15 @@ class WixMixIn:
        with open(enusf, 'wb') as f:
            f.write(enus)
        wixobj = self.j(self.installer_dir, __appname__+'.wixobj')
-        cmd = [CANDLE, '-nologo', '-ext', 'WiXUtilExtension', '-o', wixobj, wxsf]
+        arch = 'x64' if is64bit else 'x86'
+        cmd = [CANDLE, '-nologo', '-arch', arch, '-ext', 'WiXUtilExtension', '-o', wixobj, wxsf]
        self.info(*cmd)
-        subprocess.check_call(cmd)
+        self.run_wix(cmd)
        self.installer = self.j(self.src_root, 'dist')
        if not os.path.exists(self.installer):
            os.makedirs(self.installer)
-        self.installer = self.j(self.installer, '%s-%s.msi' % (__appname__,
-            __version__))
+        self.installer = self.j(self.installer, '%s%s-%s.msi' % (__appname__,
+            ('-64bit' if is64bit else ''), __version__))
        license = self.j(self.src_root, 'LICENSE.rtf')
        banner  = self.j(self.src_root, 'icons', 'wix-banner.bmp')
        dialog  = self.j(self.src_root, 'icons', 'wix-dialog.bmp')
@ -66,13 +82,27 @@ class WixMixIn:
                '-dWixUILicenseRtf='+license,
                '-dWixUIBannerBmp='+banner,
                '-dWixUIDialogBmp='+dialog]
-        cmd.append('-sice:ICE60') # No language in dlls warning
+        cmd.extend([
+            '-sice:ICE60',# No language in dlls warning
+            '-sice:ICE61',# Allow upgrading with same version number
+            '-sice:ICE40', # Re-install mode overriden
+            '-sice:ICE69', # Shortcut components are part of a different feature than the files they point to
+        ])
        if self.opts.no_ice:
            cmd.append('-sval')
        if self.opts.verbose:
            cmd.append('-v')
        self.info(*cmd)
-        subprocess.check_call(cmd)
+        self.run_wix(cmd)
+
+    def run_wix(self, cmd):
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                              stderr=subprocess.PIPE)
+        ret = p.wait()
+        self.info(p.stdout.read())
+        self.info(p.stderr.read())
+        if ret != 0:
+            sys.exit(1)

    def get_components_from_files(self):

@ -103,7 +133,20 @@ class WixMixIn:
                        (fid, f, x, checksum),
                    '</Component>'
                    ]
-                    components.append(''.join(c))
+                    if x.endswith('.exe') and not x.startswith('pdf'):
+                        # Add the executable to app paths so that users can
+                        # launch it from the run dialog even if it is not on
+                        # the path. See http://msdn.microsoft.com/en-us/library/windows/desktop/ee872121(v=vs.85).aspx
+                        c[-1:-1] = [
+                        ('<RegistryValue Root="HKLM" '
+                         r'Key="SOFTWARE\Microsoft\Windows\CurrentVersion\App '
+                         r'Paths\%s" Value="[#file_%d]" Type="string" />'%(x, fid)),
+                        ('<RegistryValue Root="HKLM" '
+                         r'Key="SOFTWARE\Microsoft\Windows\CurrentVersion\App '
+                         r'Paths\{0}" Name="Path" Value="[APPLICATIONFOLDER]" '
+                         'Type="string" />'.format(x)),
+                        ]
+                    components.append('\n'.join(c))
            return components

        components = process_dir(os.path.abspath(self.base))
@ -114,4 +157,3 @@ class WixMixIn:
        return '\t\t\t\t'+'\n\t\t\t\t'.join(components)


-
--- a/setup/iso_639/ms.po
+++ b/setup/iso_639/ms.po
--- a/setup/upload.py
+++ b/setup/upload.py
@ -29,6 +29,7 @@ STAGING_DIR = '/root/staging'
 def installers():
    installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2')))
    installers.append(installer_name('tar.bz2', is64bit=True))
+    installers.append(installer_name('msi', is64bit=True))
    installers.insert(0, 'dist/%s-%s.tar.xz'%(__appname__, __version__))
    installers.append('dist/%s-portable-installer-%s.exe'%(__appname__, __version__))
    return installers
@ -40,7 +41,7 @@ def installer_description(fname):
        bits = '32' if 'i686' in fname else '64'
        return bits + 'bit Linux binary'
    if fname.endswith('.msi'):
-        return 'Windows installer'
+        return 'Windows %sinstaller'%('64bit ' if '64bit' in fname else '')
    if fname.endswith('.dmg'):
        return 'OS X dmg'
    if fname.endswith('.exe'):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -28,7 +28,7 @@ isunix = isosx or islinux
 isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None
 ispy3 = sys.version_info.major > 2
 isxp = iswindows and sys.getwindowsversion().major < 6
-is64bit = sys.maxint > (1 << 32)
+is64bit = sys.maxsize > (1 << 32)
 isworker = os.environ.has_key('CALIBRE_WORKER') or os.environ.has_key('CALIBRE_SIMPLE_WORKER')
 if isworker:
    os.environ.pop('CALIBRE_FORCE_ANSI', None)
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -148,10 +148,10 @@ def print_basic_debug_info(out=None):
    out = functools.partial(prints, file=out)
    import platform
    from calibre.constants import (__appname__, get_version, isportable, isosx,
-                                   isfrozen)
+                                   isfrozen, is64bit)
    out(__appname__, get_version(), 'Portable' if isportable else '',
-        'isfrozen:', isfrozen)
-    out(platform.platform(), platform.system())
+        'isfrozen:', isfrozen, 'is64bit:', is64bit)
+    out(platform.platform(), platform.system(), platform.architecture())
    out(platform.system_alias(platform.system(), platform.release(),
            platform.version()))
    out('Python', platform.python_version())
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -182,7 +182,7 @@ def debug(ioreg_to_tmp=False, buf=None, plugins=None,
                    out(ioreg)

        if hasattr(buf, 'getvalue'):
-            return buf.getvalue().decode('utf-8')
+            return buf.getvalue().decode('utf-8', 'replace')
    finally:
        sys.stdout = oldo
        sys.stderr = olde
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -232,7 +232,7 @@ class ANDROID(USBMS):
            'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE',
            'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
            'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
-            'NOVO7', 'MB526', '_USB#WYK7MSF8KE']
+            'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -243,7 +243,7 @@ class ANDROID(USBMS):
            'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875',
            'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
            'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
-            'NOVO7', 'ADVANCED']
+            'NOVO7', 'ADVANCED', 'TABLET_PC']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -2357,6 +2357,8 @@ class KOBOTOUCH(KOBO):
        update_query = 'UPDATE content SET Series=?, SeriesNumber==? where BookID is Null and ContentID = ?'
        if book.series is None:
            update_values = (None, None, book.contentID, )
+        elif book.series_index is None:         # This should never happen, but...
+            update_values = (book.series, None, book.contentID, )
        else:
            update_values = (book.series, "%g"%book.series_index, book.contentID, )

--- a/src/calibre/devices/mtp/unix/devices.c
+++ b/src/calibre/devices/mtp/unix/devices.c
@ -16,6 +16,7 @@ const calibre_device_entry_t calibre_mtp_device_table[] = {

    // Nexus 10
    , { "Google", 0x18d1, "Nexus 10", 0x4ee2, DEVICE_FLAGS_ANDROID_BUGS}
+    , { "Google", 0x18d1, "Nexus 10", 0x4ee1, DEVICE_FLAGS_ANDROID_BUGS}

    , { NULL, 0xffff, NULL, 0xffff, DEVICE_FLAG_NONE }
 };
--- a/src/calibre/devices/smart_device_app/driver.py
+++ b/src/calibre/devices/smart_device_app/driver.py
@ -54,6 +54,8 @@ def synchronous(tlockname):

 class ConnectionListener (Thread):

+    NOT_SERVICED_COUNT = 6
+
    def __init__(self, driver):
        Thread.__init__(self)
        self.daemon = True
@ -78,8 +80,8 @@ class ConnectionListener (Thread):

            if not self.driver.connection_queue.empty():
                queue_not_serviced_count += 1
-                if queue_not_serviced_count >= 3:
-                    self.driver._debug('queue not serviced')
+                if queue_not_serviced_count >= self.NOT_SERVICED_COUNT:
+                    self.driver._debug('queue not serviced', queue_not_serviced_count)
                    try:
                        sock = self.driver.connection_queue.get_nowait()
                        s = self.driver._json_encode(
@ -1281,10 +1283,10 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                self._close_listen_socket()
                return message
        else:
-            while i < 100: # try up to 100 random port numbers
+            while i < 100: # try 9090 then up to 99 random port numbers
                i += 1
                port = self._attach_to_port(self.listen_socket,
-                                            random.randint(8192, 32000))
+                                9090 if i == 1 else random.randint(8192, 32000))
                if port != 0:
                    break
            if port == 0:
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@ -74,11 +74,12 @@ def remove_kindlegen_markup(parts):
        part = "".join(srcpieces)
        parts[i] = part

-    # we can safely remove all of the Kindlegen generated data-AmznPageBreak tags
+    # we can safely remove all of the Kindlegen generated data-AmznPageBreak
+    # attributes
    find_tag_with_AmznPageBreak_pattern = re.compile(
            r'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
    within_tag_AmznPageBreak_position_pattern = re.compile(
-            r'''\sdata-AmznPageBreak=['"][^'"]*['"]''')
+            r'''\sdata-AmznPageBreak=['"]([^'"]*)['"]''')

    for i in xrange(len(parts)):
        part = parts[i]
@ -86,10 +87,8 @@ def remove_kindlegen_markup(parts):
        for j in range(len(srcpieces)):
            tag = srcpieces[j]
            if tag.startswith('<'):
-                for m in within_tag_AmznPageBreak_position_pattern.finditer(tag):
-                    replacement = ''
-                    tag = within_tag_AmznPageBreak_position_pattern.sub(replacement, tag, 1)
-                srcpieces[j] = tag
+                srcpieces[j] = within_tag_AmznPageBreak_position_pattern.sub(
+                    lambda m:' style="page-break-after:%s"'%m.group(1), tag)
        part = "".join(srcpieces)
        parts[i] = part

@ -203,7 +202,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
    # All flows are now unicode and have links resolved
    return flows

-def insert_flows_into_markup(parts, flows, mobi8_reader):
+def insert_flows_into_markup(parts, flows, mobi8_reader, log):
    mr = mobi8_reader

    # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
@ -219,7 +218,12 @@ def insert_flows_into_markup(parts, flows, mobi8_reader):
            if tag.startswith('<'):
                for m in flow_pattern.finditer(tag):
                    num = int(m.group(1), 32)
+                    try:
                        fi = mr.flowinfo[num]
+                    except IndexError:
+                        log.warn('Ignoring invalid flow reference: %s'%m.group())
+                        tag = ''
+                    else:
                        if fi.format == 'inline':
                            tag = flows[num]
                        else:
@ -313,7 +317,7 @@ def expand_mobi8_markup(mobi8_reader, resource_map, log):
    flows = update_flow_links(mobi8_reader, resource_map, log)

    # Insert inline flows into the markup
-    insert_flows_into_markup(parts, flows, mobi8_reader)
+    insert_flows_into_markup(parts, flows, mobi8_reader, log)

    # Insert raster images into markup
    insert_images_into_markup(parts, resource_map, log)
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -44,6 +44,18 @@ def locate_beg_end_of_tag(ml, aid):
        return plt, pgt
    return 0, 0

+def reverse_tag_iter(block):
+    ''' Iterate over all tags in block in reverse order, i.e. last tag
+    to first tag. '''
+    end = len(block)
+    while True:
+        pgt = block.rfind(b'>', 0, end)
+        if pgt == -1: break
+        plt = block.rfind(b'<', 0, pgt)
+        if plt == -1: break
+        yield block[plt:pgt+1]
+        end = plt
+
 class Mobi8Reader(object):

    def __init__(self, mobi6_reader, log):
@ -275,13 +287,12 @@ class Mobi8Reader(object):
        return '%s/%s'%(fi.type, fi.filename), idtext

    def get_id_tag(self, pos):
-        # find the correct tag by actually searching in the destination
-        # textblock at position
+        # Find the first tag with a named anchor (name or id attribute) before
+        # pos
        fi = self.get_file_info(pos)
        if fi.num is None and fi.start is None:
            raise ValueError('No file contains pos: %d'%pos)
        textblock = self.parts[fi.num]
-        id_map = []
        npos = pos - fi.start
        pgt = textblock.find(b'>', npos)
        plt = textblock.find(b'<', npos)
@ -290,28 +301,15 @@ class Mobi8Reader(object):
        if plt == npos or pgt < plt:
            npos = pgt + 1
        textblock = textblock[0:npos]
-        # find id links only inside of tags
-        #    inside any < > pair find all "id=' and return whatever is inside
-        #    the quotes
-        id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"][^>]*>''',
-                re.IGNORECASE)
-        for m in re.finditer(id_pattern, textblock):
-            id_map.append((m.start(), m.group(1)))
+        id_re = re.compile(br'''<[^>]+\sid\s*=\s*['"]([^'"]+)['"]''')
+        name_re = re.compile(br'''<\s*a\s*\sname\s*=\s*['"]([^'"]+)['"]''')
+        for tag in reverse_tag_iter(textblock):
+            m = id_re.match(tag) or name_re.match(tag)
+            if m is not None:
+                return m.group(1)

-        if not id_map:
-            # Found no id in the textblock, link must be to top of file
+        # No tag found, link to start of file
        return b''
-        # if npos is before first id= inside a tag, return the first
-        if npos < id_map[0][0]:
-            return id_map[0][1]
-        # if npos is after the last id= inside a tag, return the last
-        if npos > id_map[-1][0]:
-            return id_map[-1][1]
-        # otherwise find last id before npos
-        for i, item in enumerate(id_map):
-            if npos < item[0]:
-                return id_map[i-1][1]
-        return id_map[0][1]

    def create_guide(self):
        guide = Guide()
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -320,13 +320,11 @@ class OEBReader(object):
                self.logger.warn(u'Spine item %r not found' % idref)
                continue
            item = manifest.ids[idref]
+            if item.media_type.lower() in OEB_DOCS and hasattr(item.data, 'xpath'):
                spine.add(item, elem.get('linear'))
-        for item in spine:
-            if item.media_type.lower() not in OEB_DOCS:
-                if not hasattr(item.data, 'xpath'):
+            else:
                self.oeb.log.warn('The item %s is not a XML document.'
                        ' Removing it from spine.'%item.href)
-                    spine.remove(item)
        if len(spine) == 0:
            raise OEBError("Spine is empty")
        self._spine_add_extra()
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -114,7 +114,9 @@ class DetectStructure(object):

        def find_matches(expr, doc):
            try:
-                return XPath(expr)(doc)
+                ans = XPath(expr)(doc)
+                len(ans)
+                return ans
            except:
                self.log.warn('Invalid chapter expression, ignoring: %s'%expr)
                return []
@ -203,7 +205,9 @@ class DetectStructure(object):

        def find_matches(expr, doc):
            try:
-                return XPath(expr)(doc)
+                ans = XPath(expr)(doc)
+                len(ans)
+                return ans
            except:
                self.log.warn('Invalid ToC expression, ignoring: %s'%expr)
                return []
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@ -27,10 +27,10 @@ def get_custom_size(opts):
    custom_size = None
    if opts.custom_size != None:
        width, sep, height = opts.custom_size.partition('x')
-        if height != '':
+        if height:
            try:
-                width = int(width)
-                height = int(height)
+                width = float(width)
+                height = float(height)
                custom_size = (width, height)
            except:
                custom_size = None
--- a/src/calibre/gui2/store/stores/libri_de_plugin.py
+++ b/src/calibre/gui2/store/stores/libri_de_plugin.py
@ -72,8 +72,8 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
                mobi = details.xpath(
                        'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())')

-                cover_url = ''.join(data.xpath('.//div[@class="coverImg"]/a/img/@src'))
-                price = ''.join(data.xpath('.//span[@class="preis"]/text()')).replace('*', '').strip()
+                cover_url = ''.join(data.xpath('.//div[@class="coverimg"]/a/img/@src'))
+                price = ''.join(data.xpath('.//div[@class="preis"]/text()')).replace('*', '').strip()

                counter -= 1

--- a/src/calibre/gui2/update.py
+++ b/src/calibre/gui2/update.py
@ -8,7 +8,7 @@ from PyQt4.Qt import (QThread, pyqtSignal, Qt, QUrl, QDialog, QGridLayout,
 import mechanize

 from calibre.constants import (__appname__, __version__, iswindows, isosx,
-        isportable)
+        isportable, is64bit)
 from calibre import browser, prints, as_unicode
 from calibre.utils.config import prefs
 from calibre.gui2 import config, dynamic, open_url
@ -19,6 +19,13 @@ URL = 'http://status.calibre-ebook.com/latest'
 NO_CALIBRE_UPDATE = '-0.0.0'
 VSEP = '|'

+def get_download_url():
+    which = ('portable' if isportable else 'windows' if iswindows
+            else 'osx' if isosx else 'linux')
+    if which == 'windows' and is64bit:
+        which += '64'
+    return 'http://calibre-ebook.com/download_' + which
+
 def get_newest_version():
    br = browser()
    req = mechanize.Request(URL)
@ -116,10 +123,7 @@ class UpdateNotification(QDialog):
        config.set('new_version_notification', bool(self.cb.isChecked()))

    def accept(self):
-        url = ('http://calibre-ebook.com/download_' +
-            ('portable' if isportable else 'windows' if iswindows
-                else 'osx' if isosx else 'linux'))
-        open_url(QUrl(url))
+        open_url(QUrl(get_download_url()))

        QDialog.accept(self)

--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -22,6 +22,7 @@ from calibre.library.comments import comments_to_html
 from calibre.library.server import custom_fields_to_display
 from calibre.library.field_metadata import category_icon_map
 from calibre.library.server.utils import quote, unquote
+from calibre.ebooks.metadata.sources.identify import urls_from_identifiers

 def xml(*args, **kwargs):
    ans = prepare_string_for_xml(*args, **kwargs)
@ -823,6 +824,16 @@ class BrowseServer(object):
                if field in ('title', 'formats') or not args.get(field, False) \
                        or not m['name']:
                    continue
+                if field == 'identifiers':
+                    urls = urls_from_identifiers(mi.get(field, {}))
+                    links = [u'<a class="details_category_link" target="_new" href="%s" title="%s:%s">%s</a>' % (url, id_typ, id_val, name)
+                            for name, id_typ, id_val, url in urls]
+                    links = u', '.join(links)
+                    if links:
+                        fields.append((m['name'], u'<strong>%s: </strong>%s'%(
+                            _('Ids'), links)))
+                        continue
+
                if m['datatype'] == 'rating':
                    r = u'<strong>%s: </strong>'%xml(m['name']) + \
                            render_rating(mi.get(field)/2.0, self.opts.url_prefix,
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/ber.po
+++ b/src/calibre/translations/ber.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/cy.po
+++ b/src/calibre/translations/cy.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/fr_CA.po
+++ b/src/calibre/translations/fr_CA.po
--- a/src/calibre/translations/fur.po
+++ b/src/calibre/translations/fur.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/him.po
+++ b/src/calibre/translations/him.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/is.po
+++ b/src/calibre/translations/is.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/jv.po
+++ b/src/calibre/translations/jv.po
--- a/src/calibre/translations/ka.po
+++ b/src/calibre/translations/ka.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/mk.po
+++ b/src/calibre/translations/mk.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/nn.po
+++ b/src/calibre/translations/nn.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pa.po
+++ b/src/calibre/translations/pa.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sc.po
+++ b/src/calibre/translations/sc.po
--- a/src/calibre/translations/si.po
+++ b/src/calibre/translations/si.po
--- a/Show More
+++ b/Show More