calibre/src/Cheetah/FileUtils.py


from glob import glob
import os
from os import listdir
import os.path
import re
from tempfile import mktemp

def _escapeRegexChars(txt,
                     escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')):
    return escapeRE.sub(r'\\\1', txt)

def findFiles(*args, **kw):
    """Recursively find all the files matching a glob pattern.

    This function is a wrapper around the FileFinder class.  See its docstring
    for details about the accepted arguments, etc."""

    return FileFinder(*args, **kw).files()

def replaceStrInFiles(files, theStr, repl):

    """Replace all instances of 'theStr' with 'repl' for each file in the 'files'
    list. Returns a dictionary with data about the matches found.

    This is like string.replace() on a multi-file basis.

    This function is a wrapper around the FindAndReplace class. See its
    docstring for more details."""

    pattern = _escapeRegexChars(theStr)
    return FindAndReplace(files, pattern, repl).results()

def replaceRegexInFiles(files, pattern, repl):

    """Replace all instances of regex 'pattern' with 'repl' for each file in the
    'files' list. Returns a dictionary with data about the matches found.

    This is like re.sub on a multi-file basis.

    This function is a wrapper around the FindAndReplace class. See its
    docstring for more details."""

    return FindAndReplace(files, pattern, repl).results()


##################################################
## CLASSES

class FileFinder:

    """Traverses a directory tree and finds all files in it that match one of
    the specified glob patterns."""

    def __init__(self, rootPath,
                 globPatterns=('*',),
                 ignoreBasenames=('CVS', '.svn'),
                 ignoreDirs=(),
                 ):

        self._rootPath = rootPath
        self._globPatterns = globPatterns
        self._ignoreBasenames = ignoreBasenames
        self._ignoreDirs = ignoreDirs
        self._files = []

        self.walkDirTree(rootPath)

    def walkDirTree(self, dir='.',

                    listdir=os.listdir,
                    isdir=os.path.isdir,
                    join=os.path.join,
                    ):

        """Recursively walk through a directory tree and find matching files."""
        processDir = self.processDir
        filterDir = self.filterDir

        pendingDirs = [dir]
        addDir = pendingDirs.append
        getDir = pendingDirs.pop

        while pendingDirs:
            dir = getDir()
            ##  process this dir
            processDir(dir)

            ## and add sub-dirs
            for baseName in listdir(dir):
                fullPath = join(dir, baseName)
                if isdir(fullPath):
                    if filterDir(baseName, fullPath):
                        addDir( fullPath )

    def filterDir(self, baseName, fullPath):

        """A hook for filtering out certain dirs. """

        return not (baseName in self._ignoreBasenames or
                    fullPath in self._ignoreDirs)

    def processDir(self, dir, glob=glob):
        extend = self._files.extend
        for pattern in self._globPatterns:
            extend( glob(os.path.join(dir, pattern)) )

    def files(self):
        return self._files

class _GenSubberFunc:

    """Converts a 'sub' string in the form that one feeds to re.sub (backrefs,
    groups, etc.) into a function that can be used to do the substitutions in
    the FindAndReplace class."""

    backrefRE = re.compile(r'\\([1-9][0-9]*)')
    groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>')

    def __init__(self, replaceStr):
        self._src = replaceStr
        self._pos = 0
        self._codeChunks = []
        self.parse()

    def src(self):
        return self._src

    def pos(self):
        return self._pos

    def setPos(self, pos):
        self._pos = pos

    def atEnd(self):
        return self._pos >= len(self._src)

    def advance(self, offset=1):
        self._pos += offset

    def readTo(self, to, start=None):
        if start == None:
            start = self._pos
        self._pos = to
        if self.atEnd():
            return self._src[start:]
        else:
            return self._src[start:to]

    ## match and get methods

    def matchBackref(self):
        return self.backrefRE.match(self.src(), self.pos())

    def getBackref(self):
        m = self.matchBackref()
        self.setPos(m.end())
        return m.group(1)

    def matchGroup(self):
        return self.groupRE.match(self.src(), self.pos())

    def getGroup(self):
        m = self.matchGroup()
        self.setPos(m.end())
        return m.group(1)

    ## main parse loop and the eat methods

    def parse(self):
        while not self.atEnd():
            if self.matchBackref():
                self.eatBackref()
            elif self.matchGroup():
                self.eatGroup()
            else:
                self.eatStrConst()

    def eatStrConst(self):
        startPos = self.pos()
        while not self.atEnd():
            if self.matchBackref() or self.matchGroup():
                break
            else:
                self.advance()
        strConst = self.readTo(self.pos(), start=startPos)
        self.addChunk(repr(strConst))

    def eatBackref(self):
        self.addChunk( 'm.group(' + self.getBackref() + ')' )

    def eatGroup(self):
        self.addChunk( 'm.group("' + self.getGroup() + '")' )

    def addChunk(self, chunk):
        self._codeChunks.append(chunk)

    ## code wrapping methods

    def codeBody(self):
        return ', '.join(self._codeChunks)

    def code(self):
        return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody())

    def subberFunc(self):
        exec(self.code())
        return subber


class FindAndReplace:

    """Find and replace all instances of 'patternOrRE' with 'replacement' for
    each file in the 'files' list. This is a multi-file version of re.sub().

    'patternOrRE' can be a raw regex pattern or
    a regex object as generated by the re module. 'replacement' can be any
    string that would work with patternOrRE.sub(replacement, fileContents).
    """

    def __init__(self, files, patternOrRE, replacement,
                 recordResults=True):


        if isinstance(patternOrRE, basestring):
            self._regex = re.compile(patternOrRE)
        else:
            self._regex = patternOrRE
        if isinstance(replacement, basestring):
            self._subber = _GenSubberFunc(replacement).subberFunc()
        else:
            self._subber = replacement

        self._pattern = pattern = self._regex.pattern
        self._files = files
        self._results = {}
        self._recordResults = recordResults

        ## see if we should use pgrep to do the file matching
        self._usePgrep = False
        if (os.popen3('pgrep')[2].read()).startswith('Usage:'):
            ## now check to make sure pgrep understands the pattern
            tmpFile = mktemp()
            open(tmpFile, 'w').write('#')
            if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()):
                # it didn't print an error msg so we're ok
                self._usePgrep = True
            os.remove(tmpFile)

        self._run()

    def results(self):
        return self._results

    def _run(self):
        regex = self._regex
        subber = self._subDispatcher
        usePgrep = self._usePgrep
        pattern = self._pattern
        for file in self._files:
            if not os.path.isfile(file):
                continue # skip dirs etc.

            self._currFile = file
            found = False
            if 'orig' in locals():
                del orig
            if self._usePgrep:
                if os.popen('pgrep "' + pattern + '" ' + file ).read():
                    found = True
            else:
                orig = open(file).read()
                if regex.search(orig):
                    found = True
            if found:
                if 'orig' not in locals():
                    orig = open(file).read()
                new = regex.sub(subber, orig)
                open(file, 'w').write(new)

    def _subDispatcher(self, match):
        if self._recordResults:
            if self._currFile not in self._results:
                res = self._results[self._currFile] = {}
                res['count'] = 0
                res['matches'] = []
            else:
                res = self._results[self._currFile]
            res['count'] += 1
            res['matches'].append({'contents': match.group(),
                                   'start': match.start(),
                                   'end': match.end(),
                                   }
                                   )
        return self._subber(match)


class SourceFileStats:

    """
    """

    _fileStats = None

    def __init__(self, files):
        self._fileStats = stats = {}
        for file in files:
            stats[file] = self.getFileStats(file)

    def rawStats(self):
        return self._fileStats

    def summary(self):
        codeLines = 0
        blankLines = 0
        commentLines = 0
        totalLines = 0
        for fileStats in self.rawStats().values():
            codeLines += fileStats['codeLines']
            blankLines += fileStats['blankLines']
            commentLines += fileStats['commentLines']
            totalLines += fileStats['totalLines']

        stats = {'codeLines': codeLines,
                 'blankLines': blankLines,
                 'commentLines': commentLines,
                 'totalLines': totalLines,
                 }
        return stats

    def printStats(self):
        pass

    def getFileStats(self, fileName):
        codeLines = 0
        blankLines = 0
        commentLines = 0
        commentLineRe = re.compile(r'\s#.*$')
        blankLineRe = re.compile('\s$')
        lines = open(fileName).read().splitlines()
        totalLines = len(lines)

        for line in lines:
            if commentLineRe.match(line):
                commentLines += 1
            elif blankLineRe.match(line):
                blankLines += 1
            else:
                codeLines += 1

        stats = {'codeLines': codeLines,
                 'blankLines': blankLines,
                 'commentLines': commentLines,
                 'totalLines': totalLines,
                 }

        return stats