Skip to content
Extraits de code Groupes Projets
Valider 35193f9a rédigé par Tomek Wójcik's avatar Tomek Wójcik
Parcourir les fichiers

Adds replay-tracking option to import_logs script.

parent aca5e95a
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
...@@ -30,6 +30,7 @@ import threading ...@@ -30,6 +30,7 @@ import threading
import time import time
import urllib import urllib
import urllib2 import urllib2
import urlparse
try: try:
import json import json
...@@ -351,6 +352,11 @@ class Configuration(object): ...@@ -351,6 +352,11 @@ class Configuration(object):
'--recorder-max-payload-size', dest='recorder_max_payload_size', default=200, type='int', '--recorder-max-payload-size', dest='recorder_max_payload_size', default=200, type='int',
help="Maximum number of log entries to record in one tracking request (default: %default). " help="Maximum number of log entries to record in one tracking request (default: %default). "
) )
option_parser.add_option(
'--replay-tracking', dest='replay_tracking',
action='store_true', default=False,
help="Replay piwik.php requests found in custom logs (only piwik.php requests expected)"
)
option_parser.add_option( option_parser.add_option(
'--output', dest='output', '--output', dest='output',
help="Redirect output (stdout and stderr) to the specified file" help="Redirect output (stdout and stderr) to the specified file"
...@@ -1170,17 +1176,13 @@ class Parser(object): ...@@ -1170,17 +1176,13 @@ class Parser(object):
The Parser parses the lines in a specified file and inserts them into The Parser parses the lines in a specified file and inserts them into
a Queue. a Queue.
""" """
check_methods = []
def __init__(self):
self.check_methods = [method for name, method
in inspect.getmembers(self, predicate=inspect.ismethod)
if name.startswith('check_')]
## All check_* methods are called for each hit and must return True if the ## All check_* methods are called for each hit and must return True if the
## hit can be imported, False otherwise. ## hit can be imported, False otherwise.
def check_hostname(self, hit): @staticmethod
def check_hostname(hit):
# Check against config.hostnames. # Check against config.hostnames.
if not hasattr(hit, 'host') or not config.options.hostnames: if not hasattr(hit, 'host') or not config.options.hostnames:
return True return True
...@@ -1194,7 +1196,8 @@ class Parser(object): ...@@ -1194,7 +1196,8 @@ class Parser(object):
stats.count_lines_hostname_skipped.increment() stats.count_lines_hostname_skipped.increment()
return result return result
def check_static(self, hit): @staticmethod
def check_static(hit):
extension = hit.path.rsplit('.')[-1].lower() extension = hit.path.rsplit('.')[-1].lower()
if extension in STATIC_EXTENSIONS: if extension in STATIC_EXTENSIONS:
if config.options.enable_static: if config.options.enable_static:
...@@ -1205,14 +1208,16 @@ class Parser(object): ...@@ -1205,14 +1208,16 @@ class Parser(object):
return False return False
return True return True
def check_download(self, hit): @staticmethod
def check_download(hit):
extension = hit.path.rsplit('.')[-1].lower() extension = hit.path.rsplit('.')[-1].lower()
if extension in DOWNLOAD_EXTENSIONS: if extension in DOWNLOAD_EXTENSIONS:
stats.count_lines_downloads.increment() stats.count_lines_downloads.increment()
hit.is_download = True hit.is_download = True
return True return True
def check_user_agent(self, hit): @staticmethod
def check_user_agent(hit):
user_agent = hit.user_agent.lower() user_agent = hit.user_agent.lower()
for s in itertools.chain(EXCLUDED_USER_AGENTS, config.options.excluded_useragents): for s in itertools.chain(EXCLUDED_USER_AGENTS, config.options.excluded_useragents):
if s in user_agent: if s in user_agent:
...@@ -1224,7 +1229,8 @@ class Parser(object): ...@@ -1224,7 +1229,8 @@ class Parser(object):
return False return False
return True return True
def check_http_error(self, hit): @staticmethod
def check_http_error(hit):
if hit.status[0] in ('4', '5'): if hit.status[0] in ('4', '5'):
if config.options.enable_http_errors: if config.options.enable_http_errors:
hit.is_error = True hit.is_error = True
...@@ -1234,7 +1240,8 @@ class Parser(object): ...@@ -1234,7 +1240,8 @@ class Parser(object):
return False return False
return True return True
def check_http_redirect(self, hit): @staticmethod
def check_http_redirect(hit):
if hit.status[0] == '3' and hit.status != '304': if hit.status[0] == '3' and hit.status != '304':
if config.options.enable_http_redirects: if config.options.enable_http_redirects:
hit.is_redirect = True hit.is_redirect = True
...@@ -1244,7 +1251,8 @@ class Parser(object): ...@@ -1244,7 +1251,8 @@ class Parser(object):
return False return False
return True return True
def check_path(self, hit): @staticmethod
def check_path(hit):
for excluded_path in config.options.excluded_paths: for excluded_path in config.options.excluded_paths:
if fnmatch.fnmatch(hit.path, excluded_path): if fnmatch.fnmatch(hit.path, excluded_path):
return False return False
...@@ -1264,7 +1272,8 @@ class Parser(object): ...@@ -1264,7 +1272,8 @@ class Parser(object):
else: else:
logging.debug('Format %s does not match', name) logging.debug('Format %s does not match', name)
def parse(self, filename): @classmethod
def parse(cls, filename):
""" """
Parse the specified filename and insert hits in the queue. Parse the specified filename and insert hits in the queue.
""" """
...@@ -1302,12 +1311,13 @@ class Parser(object): ...@@ -1302,12 +1311,13 @@ class Parser(object):
return return
file.seek(0) file.seek(0)
format = self.detect_format(file) format = cls.detect_format(file)
if format is None: if format is None:
return fatal_error( return fatal_error(
'Cannot guess the logs format. Please give one using ' 'Cannot guess the logs format. Please give one using '
'either the --log-format-name or --log-format-regex option' 'either the --log-format-name or --log-format-regex option'
) )
# Make sure the format is compatible with the resolver. # Make sure the format is compatible with the resolver.
resolver.check_format(format) resolver.check_format(format)
...@@ -1375,7 +1385,7 @@ class Parser(object): ...@@ -1375,7 +1385,7 @@ class Parser(object):
pass pass
# Check if the hit must be excluded. # Check if the hit must be excluded.
if not all((method(hit) for method in self.check_methods)): if not all((getattr(cls, name)(hit) for name in cls.check_methods)):
continue continue
# Parse date. # Parse date.
...@@ -1401,19 +1411,26 @@ class Parser(object): ...@@ -1401,19 +1411,26 @@ class Parser(object):
hit.date -= datetime.timedelta(hours=timezone/100) hit.date -= datetime.timedelta(hours=timezone/100)
# Check if the hit must be excluded. # Check if the hit must be excluded.
if all((method(hit) for method in self.check_methods)): if all((getattr(cls, name)(hit) for name in cls.check_methods)):
hits.append(hit) hits.append(hit)
if len(hits) >= config.options.recorder_max_payload_size * len(Recorder.recorders): if len(hits) >= config.options.recorder_max_payload_size * len(Recorder.recorders):
Recorder.add_hits(hits) Recorder.add_hits(hits)
hits = [] hits = []
if config.options.replay_tracking:
# we need a query string and we only consider requests with piwik.php
if hit.query_string and hit.path.lower().endswith('piwik.php'):
query_arguments = urlparse.parse_qs(hit.query_string)
if "idsite" in query_arguments:
hit.args.update((k, v.pop().encode('raw_unicode_escape').decode(config.options.encoding)) for k, v in query_arguments.iteritems())
# add last chunk of hits # add last chunk of hits
if len(hits) > 0: if len(hits) > 0:
Recorder.add_hits(hits) Recorder.add_hits(hits)
for name, method in inspect.getmembers(Parser, predicate=callable):
if name.startswith('check_'):
Parser.check_methods.append(name)
def main(): def main():
""" """
......
203.38.78.246 - - [05/Feb/2013:07:01:26 +0000] "GET /piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
203.38.78.246 - - [05/Feb/2013:07:01:41 +0000] "GET /piwik.php?action_name=AdviserBrief%20-%20Track%20Your%20Investments%20and%20Plan%20Financial%20Future%20%7C%20Clearcode&idsite=1&rec=1&r=109464&h=17&m=31&s=40&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fadviserbrief-track-your-investments-and-plan-financial-future%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
203.38.78.246 - - [05/Feb/2013:07:01:46 +0000] "GET /piwik.php?action_name=ATL%20Apps%20-%20American%20Tailgating%20League%20Mobile%20Android%20IOS%20Games%20%7C%20Clearcode&idsite=1&rec=1&r=080064&h=17&m=31&s=46&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fatl-apps-mobile-android-ios-games%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050 HTTP/1.1" 200 192 "http://clearcode.cc/case/atl-apps-mobile-android-ios-games" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"
...@@ -12,3 +12,133 @@ def test_format_detection(): ...@@ -12,3 +12,133 @@ def test_format_detection():
f = functools.partial(_test, format_name) f = functools.partial(_test, format_name)
f.description = 'Testing autodetection of format ' + format_name f.description = 'Testing autodetection of format ' + format_name
yield f yield f
class Options(object):
"""Mock config options necessary to run checkers from Parser class."""
debug = False
encoding = 'utf-8'
log_hostname = 'foo'
query_string_delimiter = '?'
piwik_token_auth = False
piwik_url = 'http://example.com'
recorder_max_payload_size = 200
replay_tracking = True
show_progress = False
skip = False
hostnames = []
excluded_paths = []
excluded_useragents = []
enable_bots = []
class Config(object):
"""Mock configuration."""
options = Options()
format = import_logs.FORMATS['ncsa_extended']
class Resolver(object):
"""Mock resolver which doesn't check connection to real piwik."""
def check_format(self, format_):
pass
class Recorder(object):
"""Mock recorder which collects hits but doesn't put their in database."""
recorders = []
@classmethod
def add_hits(cls, hits):
cls.recorders.extend(hits)
def test_replay_tracking_arguments():
"""Test data parsing from sample log file."""
import_logs.stats = import_logs.Statistics()
import_logs.config = Config()
import_logs.resolver = Resolver()
import_logs.Recorder = Recorder()
file_ = 'logs_to_tests.log'
parser = import_logs.Parser.parse(file_)
hits = [hit.args for hit in import_logs.Recorder.recorders]
assert hits[0]['_idn'] == '0'
assert hits[0]['ag'] == '1'
assert hits[0]['_viewts'] == '1360047661'
assert hits[0]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[0]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[0]['_idts'] == '1360047661'
assert hits[0]['java'] == '1'
assert hits[0]['res'] == '1680x1050'
assert hits[0]['idsite'] == '1'
assert hits[0]['realp'] == '0'
assert hits[0]['wma'] == '1'
assert hits[0]['_idvc'] == '1'
assert hits[0]['action_name'] == 'Clearcode - Web and Mobile Development | Technology With Passion'
assert hits[0]['cookie'] == '1'
assert hits[0]['rec'] == '1'
assert hits[0]['qt'] == '1'
assert hits[0]['url'] == 'http://clearcode.cc/'
assert hits[0]['h'] == '17'
assert hits[0]['m'] == '31'
assert hits[0]['s'] == '25'
assert hits[0]['r'] == '983420'
assert hits[0]['gears'] == '0'
assert hits[0]['fla'] == '1'
assert hits[0]['pdf'] == '1'
assert hits[0]['_id'] == '1da79fc743e8bcc4'
assert hits[0]['dir'] == '1'
assert hits[0]['_refts'] == '1360047661'
assert hits[1]['_idn'] == '0'
assert hits[1]['ag'] == '1'
assert hits[1]['_viewts'] == '1360047661'
assert hits[1]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[1]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[1]['_idts'] == '1360047661'
assert hits[1]['java'] == '1'
assert hits[1]['res'] == '1680x1050'
assert hits[1]['idsite'] == '1'
assert hits[1]['realp'] == '0'
assert hits[1]['wma'] == '1'
assert hits[1]['_idvc'] == '1'
assert hits[1]['action_name'] == 'AdviserBrief - Track Your Investments and Plan Financial Future | Clearcode'
assert hits[1]['cookie'] == '1'
assert hits[1]['rec'] == '1'
assert hits[1]['qt'] == '1'
assert hits[1]['url'] == 'http://clearcode.cc/case/adviserbrief-track-your-investments-and-plan-financial-future/'
assert hits[1]['h'] == '17'
assert hits[1]['m'] == '31'
assert hits[1]['s'] == '40'
assert hits[1]['r'] == '109464'
assert hits[1]['gears'] == '0'
assert hits[1]['fla'] == '1'
assert hits[1]['pdf'] == '1'
assert hits[1]['_id'] == '1da79fc743e8bcc4'
assert hits[1]['dir'] == '1'
assert hits[1]['_refts'] == '1360047661'
assert hits[2]['_idn'] == '0'
assert hits[2]['ag'] == '1'
assert hits[2]['_viewts'] == '1360047661'
assert hits[2]['urlref'] == 'http://clearcode.cc/welcome'
assert hits[2]['_ref'] == 'http://piwik.org/thank-you-all/'
assert hits[2]['_idts'] == '1360047661'
assert hits[2]['java'] == '1'
assert hits[2]['res'] == '1680x1050'
assert hits[2]['idsite'] == '1'
assert hits[2]['realp'] == '0'
assert hits[2]['wma'] == '1'
assert hits[2]['_idvc'] == '1'
assert hits[2]['action_name'] == 'ATL Apps - American Tailgating League Mobile Android IOS Games | Clearcode'
assert hits[2]['cookie'] == '1'
assert hits[2]['rec'] == '1'
assert hits[2]['qt'] == '1'
assert hits[2]['url'] == 'http://clearcode.cc/case/atl-apps-mobile-android-ios-games/'
assert hits[2]['h'] == '17'
assert hits[2]['m'] == '31'
assert hits[2]['s'] == '46'
assert hits[2]['r'] == '080064'
assert hits[2]['gears'] == '0'
assert hits[2]['fla'] == '1'
assert hits[2]['pdf'] == '1'
assert hits[2]['_id'] == '1da79fc743e8bcc4'
assert hits[2]['dir'] == '1'
assert hits[2]['_refts'] == '1360047661'
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Veuillez vous inscrire ou vous pour commenter