Saturday, 7 February 2009

Perf stats with Python

Last week we need to do a quick and dirty performance test for out Java application. The simplest way I figured was to instrument the logs with performance data for each critical step and for the overall end to end processing. These were simple log4j statements that used a set of predefined keys and logged - "****** key=value millisecs" - in the logs. We then put together a simple Python script to use regex and parse these log4j files, and spew out the statistics.

Instrumenting the Java logs is trivial, just decide the keys you want to use and put something like this:


long epoch = System.curTimeMillis();
// do your operation
logger.debug("******total_end_to_end="+(System.curTimeMIllis()-epoch));

Then to process the logs, use the below Python script





#import the regex package
import re

"""
This class is a utility class to calculate the statistics of the
data being recorded and store the data for reference.

The stats reported are a simple average, min and max values
"""
class MinMaxAvg:

def __init__(self):
self.min=0
self.max=0
self.sum=0
self.count=0
self.data=[]

def put(self,value):
#if this is the first value, set it as both min and max
if self.min==0 and self.max==0:
self.min=value
self.max=value
#we have a new min value
elif valueself.max:
self.max=value
#sum the data so far
self.sum=self.sum+value
#and increment the count
self.count=self.count+1
#store for later use
self.data.append(value)
"""
returns the minimum value amongst all the values
stored in this object
"""
def getMin(self):
return self.min
"""
returns the maximum value amongst all the values
stored in this object
"""
def getMax(self):
return self.max
"""
returns the number of values stored in this object
"""
def getCount(self):
return self.count

"""
returns the simple average
"""
def getAvg(self):
return self.sum/self.count
"""
returns the data used
"""
def getData(self):
return self.data

class ParseFile:
"""
open the file, compile the egex pattern and
set a objetc to track the stats
"""
def __init__(self,filename,pattern):
self.file=open(filename,'r')
self.pattern=re.compile(pattern)
self.minmaxavg=MinMaxAvg()

"""
parse the file and send each line for further
processing
"""
def parse(self):
for line in self.file:
self.parseLine(line)

"""
apply regex and parse individual line
"""
def parseLine(self,inputLine):
#match the line to the regex
line=self.pattern.match(inputLine)
#if it is a match
if line is not None:
#get the string representation
line=line.group()
#split at the first separator
line=line.split("******")
#split again to get the two bits that we need
data=line[1].split("=")
#get the category name
self.detail=data[0]
#and set the value to get the stats
self.minmaxavg.put(int(data[1]))

"""
This method parses the file against the pattern and prints the
stats to a file specified by the user
` """
def result(self,reportFile):
#parse the input file
self.parse()
#open the output file
f=open(reportFile,'a')
#write the results
f.write("========================================\n")
f.write("results for "+str(self.detail)+"\n")
f.write("minimum time " +str(self.minmaxavg.getMin())+"\n")
f.write("maximum time " +str(self.minmaxavg.getMax())+"\n")
f.write("Number of samples " +str(self.minmaxavg.getCount())+"\n")
f.write("Average value " +str(self.minmaxavg.getAvg())+"\n")
f.write("Data used "+ str(self.minmaxavg.getData())+"\n")
f.write("========================================\n")

#if running stand-alone
if __name__=="__main__":
ParseFile('application.log','^.*total_processing_time_per_request=[0-9]*').result('report.txt')






Hope this is useful to someone!

0 comments: