Skip to content
Extraits de code Groupes Projets
Valider fed67c99 rédigé par diosmosis's avatar diosmosis
Parcourir les fichiers

Add new --w3c-field-regex option to log importer which allows specifying regex...

Add new --w3c-field-regex option to log importer which allows specifying regex for any w3c extended log field. Can combine this option w/ --regex-group-to-...-cvar options to track any field that the importer doesn't natively recognize.
parent 28ddcab3
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
...@@ -270,9 +270,13 @@ class W3cExtendedFormat(RegexFormat): ...@@ -270,9 +270,13 @@ class W3cExtendedFormat(RegexFormat):
expected_fields['time-taken'] = '(?P<generation_time_milli>[\d.]+)' expected_fields['time-taken'] = '(?P<generation_time_milli>[\d.]+)'
for mapped_field_name, field_name in config.options.custom_w3c_fields.iteritems(): for mapped_field_name, field_name in config.options.custom_w3c_fields.iteritems():
expected_fields[mapped_field_name] = type(self).fields[field_name] expected_fields[mapped_field_name] = expected_fields[field_name]
del expected_fields[field_name] del expected_fields[field_name]
# add custom field regexes supplied through --w3c-field-regex option
for field_name, field_regex in config.options.w3c_field_regexes.iteritems():
expected_fields[field_name] = field_regex
# Skip the 'Fields: ' prefix. # Skip the 'Fields: ' prefix.
fields_line = fields_line[9:] fields_line = fields_line[9:]
for field in fields_line.split(): for field in fields_line.split():
...@@ -618,6 +622,14 @@ class Configuration(object): ...@@ -618,6 +622,14 @@ class Configuration(object):
"in conjuction with --log-format-name=w3c_extended.\n" "in conjuction with --log-format-name=w3c_extended.\n"
"Example: --w3c-fields='#Fields: date time c-ip ...'" "Example: --w3c-fields='#Fields: date time c-ip ...'"
) )
option_parser.add_option(
'--w3c-field-regex', action='callback', callback=functools.partial(self._set_option_map, 'w3c_field_regexes'), type='string',
help="Specify a regex for a field in your W3C extended log file. You can use this option to parse fields the "
"importer does not natively recognize and then use one of the --regex-group-to-XXX-cvar options to track "
"the field in a custom variable. For example, specifying --w3c-field-regex=sc-win32-status=(?P<win32_status>\\S+) "
"--regex-group-to-page-cvar=\"win32_status=Windows Status Code\" will track the sc-win32-status IIS field "
"in the 'Windows Status Code' custom variable. Regexes must contain a named group."
)
option_parser.add_option( option_parser.add_option(
'--title-category-delimiter', dest='title_category_delimiter', default='/', '--title-category-delimiter', dest='title_category_delimiter', default='/',
help="If --enable-http-errors is used, errors are shown in the page titles report. If you have " help="If --enable-http-errors is used, errors are shown in the page titles report. If you have "
...@@ -740,6 +752,15 @@ class Configuration(object): ...@@ -740,6 +752,15 @@ class Configuration(object):
if not hasattr(self.options, 'regex_group_to_page_cvars_map'): if not hasattr(self.options, 'regex_group_to_page_cvars_map'):
self.options.regex_group_to_page_cvars_map = {} self.options.regex_group_to_page_cvars_map = {}
if not hasattr(self.options, 'w3c_field_regexes'):
self.options.w3c_field_regexes = {}
else:
# make sure each custom w3c field regex has a named group
for field_name, field_regex in self.options.w3c_field_regexes.iteritems():
if '(?P<' not in field_regex:
fatal_error("cannot find named group in custom w3c field regex '%s' for field '%s'" % (field_regex, field_name))
return
if not self.options.piwik_url: if not self.options.piwik_url:
fatal_error('no URL given for Piwik') fatal_error('no URL given for Piwik')
......
...@@ -272,8 +272,13 @@ class ManySitesImportedLogs extends Fixture ...@@ -272,8 +272,13 @@ class ManySitesImportedLogs extends Fixture
if ($mapToCustom) { if ($mapToCustom) {
$opts['--regex-group-to-visit-cvar'] = 'userid=User Name'; $opts['--regex-group-to-visit-cvar'] = 'userid=User Name';
$opts['--regex-group-to-page-cvar'] = 'generation_time_milli=Generation Time'; $opts['--regex-group-to-page-cvar'] = array(
'generation_time_milli=Generation Time',
'win32_status=Windows Status Code'
);
$opts['--ignore-groups'] = 'userid'; $opts['--ignore-groups'] = 'userid';
$opts['--w3c-field-regex'] = 'sc-win32-status=(?P<win32_status>\S+)';
$opts['--w3c-time-taken-milli'] = false;
} }
self::executeLogImporter($logFile, $opts); self::executeLogImporter($logFile, $opts);
......
...@@ -225,6 +225,36 @@ ...@@ -225,6 +225,36 @@
</row> </row>
</subtable> </subtable>
</row> </row>
<row>
<label>Windows Status Code</label>
<nb_actions>4</nb_actions>
<subtable>
<row>
<label>24</label>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
</row>
<row>
<label>32</label>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
</row>
<row>
<label>42</label>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
</row>
<row>
<label>96</label>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
</row>
</subtable>
</row>
<row> <row>
<label>Bot</label> <label>Bot</label>
<nb_visits>1</nb_visits> <nb_visits>1</nb_visits>
......
...@@ -2112,8 +2112,12 @@ ...@@ -2112,8 +2112,12 @@
<customVariablePageValue1>359</customVariablePageValue1> <customVariablePageValue1>359</customVariablePageValue1>
</row> </row>
<row> <row>
<customVariablePageName2>HTTP-code</customVariablePageName2> <customVariablePageName2>Windows Status Code</customVariablePageName2>
<customVariablePageValue2>200</customVariablePageValue2> <customVariablePageValue2>96</customVariablePageValue2>
</row>
<row>
<customVariablePageName3>HTTP-code</customVariablePageName3>
<customVariablePageValue3>200</customVariablePageValue3>
</row> </row>
</customVariables> </customVariables>
<generationTime>0.36s</generationTime> <generationTime>0.36s</generationTime>
...@@ -2264,8 +2268,12 @@ ...@@ -2264,8 +2268,12 @@
<customVariablePageValue1>109</customVariablePageValue1> <customVariablePageValue1>109</customVariablePageValue1>
</row> </row>
<row> <row>
<customVariablePageName2>HTTP-code</customVariablePageName2> <customVariablePageName2>Windows Status Code</customVariablePageName2>
<customVariablePageValue2>200</customVariablePageValue2> <customVariablePageValue2>32</customVariablePageValue2>
</row>
<row>
<customVariablePageName3>HTTP-code</customVariablePageName3>
<customVariablePageValue3>200</customVariablePageValue3>
</row> </row>
</customVariables> </customVariables>
<generationTime>0.11s</generationTime> <generationTime>0.11s</generationTime>
...@@ -2475,8 +2483,12 @@ ...@@ -2475,8 +2483,12 @@
<customVariablePageValue1>359</customVariablePageValue1> <customVariablePageValue1>359</customVariablePageValue1>
</row> </row>
<row> <row>
<customVariablePageName2>HTTP-code</customVariablePageName2> <customVariablePageName2>Windows Status Code</customVariablePageName2>
<customVariablePageValue2>404</customVariablePageValue2> <customVariablePageValue2>24</customVariablePageValue2>
</row>
<row>
<customVariablePageName3>HTTP-code</customVariablePageName3>
<customVariablePageValue3>404</customVariablePageValue3>
</row> </row>
</customVariables> </customVariables>
<generationTime>0.36s</generationTime> <generationTime>0.36s</generationTime>
...@@ -2620,8 +2632,12 @@ ...@@ -2620,8 +2632,12 @@
<customVariablePageValue1>0</customVariablePageValue1> <customVariablePageValue1>0</customVariablePageValue1>
</row> </row>
<row> <row>
<customVariablePageName2>HTTP-code</customVariablePageName2> <customVariablePageName2>Windows Status Code</customVariablePageName2>
<customVariablePageValue2>301</customVariablePageValue2> <customVariablePageValue2>42</customVariablePageValue2>
</row>
<row>
<customVariablePageName3>HTTP-code</customVariablePageName3>
<customVariablePageValue3>301</customVariablePageValue3>
</row> </row>
</customVariables> </customVariables>
<icon /> <icon />
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#Version: 1.0 #Version: 1.0
#Start-Date: 2014-11-18 00:00:00.128 #Start-Date: 2014-11-18 00:00:00.128
#Fields: date-local time-local s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) cs(Referer) cs(Host) sc-status sc-substatus sc-win32-status TimeTakenMS #Fields: date-local time-local s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) cs(Referer) cs(Host) sc-status sc-substatus sc-win32-status TimeTakenMS
2012-08-15 17:00:00.363 10.10.28.140 GET /Products/theProduct - 80 user1 "70.95.0.0" "Mozilla/5.0 (Linux; Android 4.4.4; SM-G900V Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.59 Mobile Safari/537.36" "http://example.com/Search/SearchResults.pg?informationRecipient.languageCode.c=en" "xzy.example.com" 200 0 0 109 2012-08-15 17:00:00.363 10.10.28.140 GET /Products/theProduct - 80 user1 "70.95.0.0" "Mozilla/5.0 (Linux; Android 4.4.4; SM-G900V Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.59 Mobile Safari/537.36" "http://example.com/Search/SearchResults.pg?informationRecipient.languageCode.c=en" "xzy.example.com" 200 0 32 109
2012-08-15 17:00:00.660 10.10.28.140 GET /Topic/hw43061 - 80 user1 "70.95.32.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36" - "example.hello.com" 301 0 0 0 2012-08-15 17:00:00.660 10.10.28.140 GET /Topic/hw43061 - 80 user1 "70.95.32.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36" - "example.hello.com" 301 0 42 0
2012-08-15 17:00:00.675 10.10.28.140 GET /hello/world/6,681965 - 80 - "173.5.0.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36" - "hello.example.com" 404 0 0 359 2012-08-15 17:00:00.675 10.10.28.140 GET /hello/world/6,681965 - 80 - "173.5.0.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36" - "hello.example.com" 404 0 24 359
2012-08-15 17:30:00.675 10.10.28.140 GET /hello/from/another/world/6,681965 - 80 user2 "173.5.0.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36" - "hello.example.com" 200 0 0 359 2012-08-15 17:30:00.675 10.10.28.140 GET /hello/from/another/world/6,681965 - 80 user2 "173.5.0.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36" - "hello.example.com" 200 0 96 359
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter