1 """
2 Plan:
3 Parse:
4 ip,bytesTx,referrer,statuscode,user-agent
5 datetime *
6
7 Insert:
8
9
10 Features:
11 possible broken links
12 request flooding
13 page's hits by weeks
14 page's hits throughout the day hence possible audience
15 """
16
17
18
19 import time
20 import sqlite3
21 import re
22 from datetime import datetime
23
25 """This class provides functionality to parse the server log file and insert it into sqlite database.
26 Database table is deleted during creation of the each instance of this class
27 """
28 - def __init__(self,logfilename=None,dbfilelocation=None):
29 """
30 This constructor creates an instance of Parser class. It require two parameters server log file path and database file location
31
32 @param logfilename: path of server log file
33 @param dbfilelocation: location of the sqlite database file to create database
34 """
35
36 if logfilename!=None:
37 __logfilename__=logfilename
38
39 if dbfilelocation!=None:
40 __dbfilename__=dbfilelocation
41 resetDb()
42 conn=sqlite3.connect(dbfile())
43 fp=file(logfile())
44
45
46 tuples=self.parseLines(fp.readlines(5000000))
47 while tuples!=[]:
48 conn.executemany("""INSERT INTO serverlog VALUES(?,?,?,?,?,?,?,?,?)""",tuples)
49 tuples=self.parseLines(fp.readlines(5000000))
50
51 conn.commit()
52 conn.close()
53 fp.close()
54
55
56
58 """This function takes iteratable collection of lines from server log file.
59 It returns the list of tuples in format t(ip,datetime,request-method,request-path,request-param,user-agent,status-code,bytesTx,referrer)
60
61 @param logLines: iteratable collection of lines from server log file
62 @return: list of tuples
63 """
64
65 lValues=[]
66 for line in logLines:
67 d={}
68
69
70 reIp=re.compile('^[^ ]+')
71 finds=reIp.findall(line)
72 captIp=finds[0]
73 d.update({'ip':captIp})
74
75
76
77 reQuoted=re.compile('\\"[^\\"]*\\"')
78 found=reQuoted.findall(line)
79 if len(finds)>0:
80
81 captQuoted=[quotedStr.strip('"') for quotedStr in found]
82 request=captQuoted[0]
83
84 assert len(request.split())==3
85
86 request=request.split()
87 requestMethod=request[0]
88 requestPath=request[1]
89
90 requestPath=requestPath.split('?')
91 requestParam='-'
92 if len(requestPath)==2:
93 requestParam=requestPath[1]
94
95 requestPath=requestPath[0]
96
97 d.update({'request':captQuoted[0],'referrer':captQuoted[1],'user-agent':captQuoted[2],'request-method':requestMethod,'request-path':requestPath,'request-param':requestParam})
98
99
100
101 reNums=re.compile(' (\d+) (\d+|-) ')
102 found=reNums.findall(line)[0]
103
104
105 assert len(found)==2
106
107 status_code=found[0]
108 bytesTx=found[1]
109
110 int(status_code)
111 if bytesTx!='-':
112 int(bytesTx)
113 d.update({'status-code':status_code,'bytesTx':bytesTx})
114
115
116
117 dt=self.__parseDateTime__(line)
118 d.update({'datetime':dt})
119
120 lValues.append((d['ip'],d['datetime'],d['request-method'],
121 d['request-path'],d['request-param'],d['user-agent'],
122 d['status-code'],d['bytesTx'],d['referrer']))
123 return lValues
124
126 """This class is used to parse the date and time from the line of logfile. Intended for the internal use
127 @param logLine: a line of logfile
128 @return: python datetime object
129 """
130
131 reDate=re.compile('\[(.+)\]')
132 strDate=reDate.findall(logLine)[0].split()[0]
133 dt=datetime.strptime(strDate, "%d/%b/%Y:%H:%M:%S")
134 return dt
135
137 """This file is generated during the testing of datetime parsing. Intended for internal use
138 @return: Nothing
139 """
140 dateString='15/Dec/2008:17:17:34 +0530'
141 dt=datetime.strptime("15/Dec/2008:17:00:26", "%d/%b/%Y:%H:%M:%S")
142
143 return
144
146 """This file is generated during the testing of logfile parsing. Intended for internal use
147 @return: Nothing
148 """
149
150 reIp=re.compile('^[0-9\.]*')
151 finds=reIp.findall('10.105.2.6 - -')
152 captIp=finds[0]
153 print captIp
154
155
156
157
158 reTime=re.compile('\\[')
159 match=reTime.match('10 - - [1234 asd]12345[98766e]')
160 print match
161 capTime=match.groups()
162 print capTime
163
164
165
166 reReq=re.compile('\\"[^\\"]*\\"')
167 finds=reReq.findall(lines[0])
168 if len(finds)>0:
169 captQuoted=finds[0]
170 print finds
171 print captQuoted
172 striped= [ find.strip('"') for find in finds]
173 print striped
174
175 captQuoted=[quotedStr.strip('"') for quotedStr in captQuoted]
176 print captQuoted
177 d={'referrer':captQuoted[1],'user-agent':captQuoted[2]}
178
179
180
181
182 reNums=re.compile(' (\d+) (\d+|-) ')
183
184
185
186
187
188 found=reNums.findall(lines[0])[0]
189 print found
190 assert len(found)==2 or len(found)==1
191 status_code=found[0]
192 bytesTx=found[1]
193
194 print status_code
195 print bytesTx
196
197 return
198
199
200 """File path of the database file"""
201 __dbfilename__='test.db'
202
204 """
205 retutn: File path of the database file
206 """
207 return __dbfilename__
208
209 """File path of the server log file"""
210 __logfilename__='access_log'
211
213 """
214 return: File path of the server log file"""
215 return __logfilename__
216
218 """Date time adapter for databse
219 """
220 return time.mktime(ts.timetuple())
221
222
224 """This function deletes the previously created table from the database and creates a fresh new table having name serverlog
225 """
226 conn=sqlite3.connect(dbfile())
227 conn.execute('drop table serverlog;')
228 conn.execute("""create table serverlog(ip varchar(15),requestdate date,request_method varchar(4),request_path varchar(255),request_param varchar(255),user_agent varchar(100),status_code char(3), bytesTx integer, referrer varchar(255)) ;""")
229 conn.close()
230
231 print 'Hello World'
232
233
234 print 'Finished'
235