diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py index d129d9c4cd583d3000b2b4df05d7e9e4945df389..6cb456cc0e417d90086e3251417a57fa94f69485 100755 --- a/misc/log-analytics/import_logs.py +++ b/misc/log-analytics/import_logs.py @@ -270,9 +270,13 @@ class W3cExtendedFormat(RegexFormat): expected_fields['time-taken'] = '(?P<generation_time_milli>[\d.]+)' for mapped_field_name, field_name in config.options.custom_w3c_fields.iteritems(): - expected_fields[mapped_field_name] = type(self).fields[field_name] + expected_fields[mapped_field_name] = expected_fields[field_name] del expected_fields[field_name] + # add custom field regexes supplied through --w3c-field-regex option + for field_name, field_regex in config.options.w3c_field_regexes.iteritems(): + expected_fields[field_name] = field_regex + # Skip the 'Fields: ' prefix. fields_line = fields_line[9:] for field in fields_line.split(): @@ -618,6 +622,14 @@ class Configuration(object): "in conjuction with --log-format-name=w3c_extended.\n" "Example: --w3c-fields='#Fields: date time c-ip ...'" ) + option_parser.add_option( + '--w3c-field-regex', action='callback', callback=functools.partial(self._set_option_map, 'w3c_field_regexes'), type='string', + help="Specify a regex for a field in your W3C extended log file. You can use this option to parse fields the " + "importer does not natively recognize and then use one of the --regex-group-to-XXX-cvar options to track " + "the field in a custom variable. For example, specifying --w3c-field-regex=sc-win32-status=(?P<win32_status>\\S+) " + "--regex-group-to-page-cvar=\"win32_status=Windows Status Code\" will track the sc-win32-status IIS field " + "in the 'Windows Status Code' custom variable. Regexes must contain a named group." + ) option_parser.add_option( '--title-category-delimiter', dest='title_category_delimiter', default='/', help="If --enable-http-errors is used, errors are shown in the page titles report. If you have " @@ -740,6 +752,15 @@ class Configuration(object): if not hasattr(self.options, 'regex_group_to_page_cvars_map'): self.options.regex_group_to_page_cvars_map = {} + if not hasattr(self.options, 'w3c_field_regexes'): + self.options.w3c_field_regexes = {} + else: + # make sure each custom w3c field regex has a named group + for field_name, field_regex in self.options.w3c_field_regexes.iteritems(): + if '(?P<' not in field_regex: + fatal_error("cannot find named group in custom w3c field regex '%s' for field '%s'" % (field_regex, field_name)) + return + if not self.options.piwik_url: fatal_error('no URL given for Piwik') diff --git a/tests/PHPUnit/Fixtures/ManySitesImportedLogs.php b/tests/PHPUnit/Fixtures/ManySitesImportedLogs.php index 1dd043200fb4f2e5361c9e652c3482392eff2c36..c7e65c74993d6d219f48e84d970af64a1d795480 100644 --- a/tests/PHPUnit/Fixtures/ManySitesImportedLogs.php +++ b/tests/PHPUnit/Fixtures/ManySitesImportedLogs.php @@ -272,8 +272,13 @@ class ManySitesImportedLogs extends Fixture if ($mapToCustom) { $opts['--regex-group-to-visit-cvar'] = 'userid=User Name'; - $opts['--regex-group-to-page-cvar'] = 'generation_time_milli=Generation Time'; + $opts['--regex-group-to-page-cvar'] = array( + 'generation_time_milli=Generation Time', + 'win32_status=Windows Status Code' + ); $opts['--ignore-groups'] = 'userid'; + $opts['--w3c-field-regex'] = 'sc-win32-status=(?P<win32_status>\S+)'; + $opts['--w3c-time-taken-milli'] = false; } self::executeLogImporter($logFile, $opts); diff --git a/tests/PHPUnit/System/expected/test_ImportLogs__CustomVariables.getCustomVariables_month.xml b/tests/PHPUnit/System/expected/test_ImportLogs__CustomVariables.getCustomVariables_month.xml index 4a6377547d38bc4a58f0fa6153bb567e5662ef13..a0aa7ab8ff135faaf6a11c2a8404665e6f94ae5c 100644 --- a/tests/PHPUnit/System/expected/test_ImportLogs__CustomVariables.getCustomVariables_month.xml +++ b/tests/PHPUnit/System/expected/test_ImportLogs__CustomVariables.getCustomVariables_month.xml @@ -225,6 +225,36 @@ </row> </subtable> </row> + <row> + <label>Windows Status Code</label> + <nb_actions>4</nb_actions> + <subtable> + <row> + <label>24</label> + <nb_visits>1</nb_visits> + <nb_actions>1</nb_actions> + <sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors> + </row> + <row> + <label>32</label> + <nb_visits>1</nb_visits> + <nb_actions>1</nb_actions> + <sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors> + </row> + <row> + <label>42</label> + <nb_visits>1</nb_visits> + <nb_actions>1</nb_actions> + <sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors> + </row> + <row> + <label>96</label> + <nb_visits>1</nb_visits> + <nb_actions>1</nb_actions> + <sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors> + </row> + </subtable> + </row> <row> <label>Bot</label> <nb_visits>1</nb_visits> diff --git a/tests/PHPUnit/System/expected/test_ImportLogs__Live.getLastVisitsDetails_range.xml b/tests/PHPUnit/System/expected/test_ImportLogs__Live.getLastVisitsDetails_range.xml index 544cac8cbb217f11529a5e525b311b9963da4722..3d2bb39bfc177db1f810ca536fc131d23e507430 100644 --- a/tests/PHPUnit/System/expected/test_ImportLogs__Live.getLastVisitsDetails_range.xml +++ b/tests/PHPUnit/System/expected/test_ImportLogs__Live.getLastVisitsDetails_range.xml @@ -2112,8 +2112,12 @@ <customVariablePageValue1>359</customVariablePageValue1> </row> <row> - <customVariablePageName2>HTTP-code</customVariablePageName2> - <customVariablePageValue2>200</customVariablePageValue2> + <customVariablePageName2>Windows Status Code</customVariablePageName2> + <customVariablePageValue2>96</customVariablePageValue2> + </row> + <row> + <customVariablePageName3>HTTP-code</customVariablePageName3> + <customVariablePageValue3>200</customVariablePageValue3> </row> </customVariables> <generationTime>0.36s</generationTime> @@ -2264,8 +2268,12 @@ <customVariablePageValue1>109</customVariablePageValue1> </row> <row> - <customVariablePageName2>HTTP-code</customVariablePageName2> - <customVariablePageValue2>200</customVariablePageValue2> + <customVariablePageName2>Windows Status Code</customVariablePageName2> + <customVariablePageValue2>32</customVariablePageValue2> + </row> + <row> + <customVariablePageName3>HTTP-code</customVariablePageName3> + <customVariablePageValue3>200</customVariablePageValue3> </row> </customVariables> <generationTime>0.11s</generationTime> @@ -2475,8 +2483,12 @@ <customVariablePageValue1>359</customVariablePageValue1> </row> <row> - <customVariablePageName2>HTTP-code</customVariablePageName2> - <customVariablePageValue2>404</customVariablePageValue2> + <customVariablePageName2>Windows Status Code</customVariablePageName2> + <customVariablePageValue2>24</customVariablePageValue2> + </row> + <row> + <customVariablePageName3>HTTP-code</customVariablePageName3> + <customVariablePageValue3>404</customVariablePageValue3> </row> </customVariables> <generationTime>0.36s</generationTime> @@ -2620,8 +2632,12 @@ <customVariablePageValue1>0</customVariablePageValue1> </row> <row> - <customVariablePageName2>HTTP-code</customVariablePageName2> - <customVariablePageValue2>301</customVariablePageValue2> + <customVariablePageName2>Windows Status Code</customVariablePageName2> + <customVariablePageValue2>42</customVariablePageValue2> + </row> + <row> + <customVariablePageName3>HTTP-code</customVariablePageName3> + <customVariablePageValue3>301</customVariablePageValue3> </row> </customVariables> <icon /> diff --git a/tests/resources/access-logs/fake_logs_custom_iis.log b/tests/resources/access-logs/fake_logs_custom_iis.log index d7ff476a6c5cff22e345e748c34e3c70fbd73aa5..f2001c51ddb96f0017fc9ae9fb5c950a0134ca85 100644 --- a/tests/resources/access-logs/fake_logs_custom_iis.log +++ b/tests/resources/access-logs/fake_logs_custom_iis.log @@ -2,7 +2,7 @@ #Version: 1.0 #Start-Date: 2014-11-18 00:00:00.128 #Fields: date-local time-local s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) cs(Referer) cs(Host) sc-status sc-substatus sc-win32-status TimeTakenMS -2012-08-15 17:00:00.363 10.10.28.140 GET /Products/theProduct - 80 user1 "70.95.0.0" "Mozilla/5.0 (Linux; Android 4.4.4; SM-G900V Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.59 Mobile Safari/537.36" "http://example.com/Search/SearchResults.pg?informationRecipient.languageCode.c=en" "xzy.example.com" 200 0 0 109 -2012-08-15 17:00:00.660 10.10.28.140 GET /Topic/hw43061 - 80 user1 "70.95.32.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36" - "example.hello.com" 301 0 0 0 -2012-08-15 17:00:00.675 10.10.28.140 GET /hello/world/6,681965 - 80 - "173.5.0.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36" - "hello.example.com" 404 0 0 359 -2012-08-15 17:30:00.675 10.10.28.140 GET /hello/from/another/world/6,681965 - 80 user2 "173.5.0.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36" - "hello.example.com" 200 0 0 359 +2012-08-15 17:00:00.363 10.10.28.140 GET /Products/theProduct - 80 user1 "70.95.0.0" "Mozilla/5.0 (Linux; Android 4.4.4; SM-G900V Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.59 Mobile Safari/537.36" "http://example.com/Search/SearchResults.pg?informationRecipient.languageCode.c=en" "xzy.example.com" 200 0 32 109 +2012-08-15 17:00:00.660 10.10.28.140 GET /Topic/hw43061 - 80 user1 "70.95.32.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36" - "example.hello.com" 301 0 42 0 +2012-08-15 17:00:00.675 10.10.28.140 GET /hello/world/6,681965 - 80 - "173.5.0.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36" - "hello.example.com" 404 0 24 359 +2012-08-15 17:30:00.675 10.10.28.140 GET /hello/from/another/world/6,681965 - 80 user2 "173.5.0.0" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36" - "hello.example.com" 200 0 96 359