#! /usr/bin/env python import re import bz2 import getopt import sys import glob class search: def __init__(self, str): self.re = re.compile(str, re.IGNORECASE) def search(self, item): m = self.re.search(item) if m: # a match exists return 1 else: # no match return 0 class clipper: def __init__(self, files=[], searchKeywords=[], output="clipper.out"): self.searchKeywords = searchKeywords self.files = files self.outputFileName=output self.secondsSearch = search('^seconds') self.nsSearch = search('^ns') def isBZ2File(self, filename): bz2re = re.compile(".bz2$", re.IGNORECASE) m = bz2re.search(filename) if m: # it is a bzip file return 1 else: return 0 def searchColumns(self): # sort the files to get them in order self.files.sort() print "Matching Files: '%s'" % self.files # get the column numbers if(self.isBZ2File(self.files[0])): f = bz2.BZ2File(self.files[0], mode='r') else: f = open(self.files[0]) # read in the comment at the start of the dat file comment = f.readline() print "Header Comment: '%s'" % comment # read the column descriptors header = f.readline() splitheader = header.split(',') userColumns = [] for keyword in self.searchKeywords: print "Searching for columns matching: '%s'" % keyword searching = search(keyword) columns = filter(searching.search, splitheader) if(len(columns)!=0): userColumns = userColumns + columns else: print "No match for column: '%s'" % keyword if(len(userColumns)==0): print "No columns found matching keywords... Exiting.." return print "Matching logfile columns:" for column in userColumns: print "%s" % column def listColumns(self): # sort the files to get them in order self.files.sort() print "Matching Files: '%s'" % self.files # get the column numbers if(self.isBZ2File(self.files[0])): f = bz2.BZ2File(self.files[0], mode='r') else: f = open(self.files[0]) # read in the comment at the start of the dat file comment = f.readline() print "Header Comment: '%s'" % comment # read the column descriptors header = f.readline() splitheader = header.split(',') print "Columns:" for column in splitheader: print "%s" % column def clipData(self): # sort the files to get them in order self.files.sort() print "Matching Files: '%s'" % self.files # get the column numbers if(self.isBZ2File(self.files[0])): f = bz2.BZ2File(self.files[0], mode='r') else: f = open(self.files[0]) # read in the comment at the start of the dat file comment = f.readline() print "Header Comment: '%s'" % comment # read the column descriptors header = f.readline() splitheader = header.split(',') userColumns = [] for keyword in self.searchKeywords: print "Searching for columns matching: '%s'" % keyword searching = search(keyword) columns = filter(searching.search, splitheader) if(len(columns)!=0): userColumns = userColumns + columns else: print "No match for column: '%s'" % keyword if(len(userColumns)==0): print "No columns found matching keywords... Exiting.." return print userColumns print "\nSearching for Time columns" secondsColumns = filter(self.secondsSearch.search, splitheader) nsColumns = filter(self.nsSearch.search, splitheader) print "Found Time Columns:\n" print secondsColumns print nsColumns interestingIndices = [] # search for seconds indices for columns in secondsColumns: interestingIndices.append(splitheader.index(columns)) # search for ns indices for columns in nsColumns: interestingIndices.append(splitheader.index(columns)) # seach for all other interesting indices for columns in userColumns: try: interestingIndices.append(splitheader.index(columns)) except: print "Problem getting the index for: '%s'" % columns # an output file.. where to put all this data.. outputFile = open(self.outputFileName, 'w') # print a header outputFile.write("# %s %s %s\n" % (secondsColumns,nsColumns,userColumns)) f.close() # read all files for file in self.files: print "---------" # get the column numbers if(self.isBZ2File(file)): dataFile = bz2.BZ2File(file, mode='r') else: dataFile = open(file, 'r') # read in comment comment = dataFile.readline() print "Comment: '%s'" % comment # column descriptor # for now assume they are all the same line = dataFile.readline() line = dataFile.readline() while( line ): data = "" splitLine = line.split(',') try: for index in interestingIndices: if data: data = data + "\t" + splitLine[index] else: data = splitLine[index] except: break outputFile.write(data+'\n') line = dataFile.readline() print "-----------" dataFile.close() def usage(): print "USAGE:" print "-l\t--listcolumns" print "-s\t--search" print "-c\t--clipcolumns" print "-o\t--output" def main(): try: opts, args = getopt.getopt(sys.argv[1:], "ls:c:o:", ["listcolumns", "search=", "clipcolumn=", "output=" ]) except getopt.GetoptError: usage() sys.exit(2) searching=0 clipColumns=0 listColumns=0 searchKeys = [] outputFile = "" for o,a in opts: if o in ("-l", "--listcolumns"): listColumns=1 if o in ("-s", "--search"): searching=1 searchKeys = searchKeys + [ a ] if o in ("-c", "--clipcolumn"): clipColumns=1 searchKeys = searchKeys + [ a ] if o in ("-o", "--output"): outputFile = a if( (searching+clipColumns+listColumns)>1): print "Pick one only of searching, clip columns or list columns" usage() sys.exit(2) if(len(args)==0): print "No files specified to clip" usage() sys.exit(2) files = [] for file in args: files = files + glob.glob(file) if(len(files)==0): print "No files specified to clip" usage() sys.exit(2) print "Number of matching files: %d" % len(files) if(len(outputFile)==0): print "Output File defaulting to 'clipper.out'" outputFile = 'clipper.out' clipClass = clipper(files=args, searchKeywords=searchKeys, output=outputFile) if(clipColumns): clipClass.clipData() elif (listColumns): clipClass.listColumns() else: clipClass.searchColumns() if __name__ == "__main__": main()