#! /usr/bin/env python

import re
import bz2
import getopt
import sys
import glob

class search:
    def __init__(self, str):
        self.re = re.compile(str, re.IGNORECASE)

    def search(self, item):
        m = self.re.search(item)
        if m:
            # a match exists
            return 1
        else:
            # no match
            return 0

class clipper:
    
    def __init__(self, files=[], searchKeywords=[], output="clipper.out"):
        self.searchKeywords = searchKeywords

        self.files = files

        self.outputFileName=output
                
        self.secondsSearch = search('^seconds')
        self.nsSearch = search('^ns')
        

    def isBZ2File(self, filename):
        bz2re = re.compile(".bz2$", re.IGNORECASE)
        m = bz2re.search(filename)
        if m:
            # it is a bzip file
            return 1
        else:
            return 0
        
    def searchColumns(self):

        # sort the files to get them in order
        self.files.sort()

        print "Matching Files: '%s'" % self.files

        # get the column numbers
        if(self.isBZ2File(self.files[0])):
            f = bz2.BZ2File(self.files[0], mode='r')
        else:
            f = open(self.files[0])

        # read in the comment at the start of the dat file
        comment = f.readline()
        print "Header Comment: '%s'" % comment
        
        # read the column descriptors
        header = f.readline()
        splitheader = header.split(',')

        userColumns = []
        for keyword in self.searchKeywords:
            print "Searching for columns matching: '%s'"  % keyword
            searching = search(keyword)
            columns = filter(searching.search, splitheader)
            if(len(columns)!=0):
                userColumns = userColumns + columns
            else:
                print "No match for column: '%s'" % keyword
        
        if(len(userColumns)==0):
            print "No columns found matching keywords... Exiting.."
            return

        print "Matching logfile columns:"
        for column in userColumns:
            print "%s" % column
        
            
    def listColumns(self):
        
        # sort the files to get them in order
        self.files.sort()

        print "Matching Files: '%s'" % self.files

        # get the column numbers
        if(self.isBZ2File(self.files[0])):
            f = bz2.BZ2File(self.files[0], mode='r')
        else:
            f = open(self.files[0])

        # read in the comment at the start of the dat file
        comment = f.readline()
        print "Header Comment: '%s'" % comment
        
        # read the column descriptors
        header = f.readline()
        splitheader = header.split(',')

        print "Columns:"
        for column in splitheader:
            print "%s" % column

        
    def clipData(self):
        
        # sort the files to get them in order
        self.files.sort()

        print "Matching Files: '%s'" % self.files

        # get the column numbers
        if(self.isBZ2File(self.files[0])):
            f = bz2.BZ2File(self.files[0], mode='r')
        else:
            f = open(self.files[0])

        # read in the comment at the start of the dat file
        comment = f.readline()
        print "Header Comment: '%s'" % comment
        
        # read the column descriptors
        header = f.readline()
        splitheader = header.split(',')

        userColumns = []
        for keyword in self.searchKeywords:
            print "Searching for columns matching: '%s'"  % keyword
            searching = search(keyword)
            columns = filter(searching.search, splitheader)
            if(len(columns)!=0):
                userColumns = userColumns + columns
            else:
                print "No match for column: '%s'" % keyword
        
        if(len(userColumns)==0):
            print "No columns found matching keywords... Exiting.."
            return
        
            
        print userColumns

        print "\nSearching for Time columns"
        secondsColumns = filter(self.secondsSearch.search, splitheader)
        nsColumns = filter(self.nsSearch.search, splitheader)

        print "Found Time Columns:\n"
        print secondsColumns
        print nsColumns

        interestingIndices = []

        # search for seconds indices
        for columns in secondsColumns:
            interestingIndices.append(splitheader.index(columns))

        # search for ns indices
        for columns in nsColumns:
            interestingIndices.append(splitheader.index(columns))

        # seach for all other interesting indices
        for columns in userColumns:
            try:
                interestingIndices.append(splitheader.index(columns))
            except:
                print "Problem getting the index for: '%s'" % columns

        # an output file..  where to put all this data..
        outputFile = open(self.outputFileName, 'w')
        # print a header
        outputFile.write("# %s %s %s\n" % (secondsColumns,nsColumns,userColumns))

        f.close()

        # read all files
        for file in self.files:

            print "---------"
            # get the column numbers
            if(self.isBZ2File(file)):
                dataFile = bz2.BZ2File(file, mode='r')
            else:
                dataFile = open(file, 'r')

            # read in comment
            comment = dataFile.readline()
            print "Comment: '%s'" % comment

            # column descriptor
            # for now assume they are all the same
            line = dataFile.readline()
                    
            line = dataFile.readline()
            
            while( line ):
                data = ""
                splitLine = line.split(',')
                try:
                    for index in interestingIndices:
                        if data:
                            data = data + "\t" + splitLine[index]
                        else:
                            data = splitLine[index]
                except:
                    break
                
                outputFile.write(data+'\n')
                line = dataFile.readline()

            print "-----------"
            dataFile.close()
            

def usage():
    print "USAGE:"
    print "-l\t--listcolumns"
    print "-s\t--search"
    print "-c\t--clipcolumns"
    print "-o\t--output"
    
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ls:c:o:", ["listcolumns", "search=",
                                                           "clipcolumn=", "output=" ])
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    searching=0
    clipColumns=0
    listColumns=0
    
    searchKeys = []
    outputFile = ""
    for o,a in opts:
        if o in ("-l", "--listcolumns"):
            listColumns=1
        if o in ("-s", "--search"):
            searching=1
            searchKeys = searchKeys + [ a ]
        if o in ("-c", "--clipcolumn"):
            clipColumns=1
            searchKeys = searchKeys + [ a ]
        if o in ("-o", "--output"):
            outputFile = a
            

    if( (searching+clipColumns+listColumns)>1):
        print "Pick one only of searching, clip columns or list columns"
        usage()
        sys.exit(2)

    if(len(args)==0):
        print "No files specified to clip"
        usage()
        sys.exit(2)

    files = []
    for file in args:
        files = files + glob.glob(file)

    if(len(files)==0):
        print "No files specified to clip"
        usage()
        sys.exit(2)

    print "Number of matching files: %d" % len(files)

    if(len(outputFile)==0):
        print "Output File defaulting to 'clipper.out'"
        outputFile = 'clipper.out'


    clipClass = clipper(files=args, searchKeywords=searchKeys, output=outputFile)

    if(clipColumns):
        clipClass.clipData()
    elif (listColumns):
        clipClass.listColumns()
    else:
        clipClass.searchColumns()
    
if __name__ == "__main__":
    main()