Skip to content
Extraits de code Groupes Projets
import_logs.py 37,9 ko
Newer Older
  • Learn to ignore specific revisions
  •                 format = FORMATS[format_name]
                    config.format = format
                    config.format_regexp = re.compile(format)
    
                    # Make sure the format is compatible with the resolver.
                    resolver.check_format(format)
    
                stats.count_lines_parsed.increment()
                if stats.count_lines_parsed.value <= config.options.skip:
                    continue
    
                match = config.format_regexp.match(line)
                if not match:
    
                    continue
    
                hit = Hit(
                    filename=filename,
                    lineno=lineno,
                    status=match.group('status'),
                    full_path=match.group('path'),
    
    Cyril Bay's avatar
    Cyril Bay a validé
                    is_download=False,
                    is_robot=False,
    
    Cyril Bay's avatar
    Cyril Bay a validé
                if config.options.strip_query_string:
                    hit.path = hit.full_path.split('?', 1)[0]
                else:
                    hit.path = hit.full_path
    
    
                # Parse date _with_ timezone to get an UTC timestamp.
                date_string = match.group('date')
                try:
                    tz = float(date_string[-5:])
                    hit.date = datetime.datetime.strptime(date_string[:-6], '%d/%b/%Y:%H:%M:%S')
                except ValueError:
                    # Date format is incorrect, the line is probably badly formatted.
    
                    continue
                hit.date -= datetime.timedelta(hours=tz/100)
    
                try:
                    hit.referrer = match.group('referrer')
                except IndexError:
                    hit.referrer = ''
                if hit.referrer == '-':
                    hit.referrer = ''
    
                try:
                    hit.user_agent = match.group('user_agent')
                except IndexError:
                    hit.user_agent = ''
    
                hit.ip = match.group('ip')
                try:
                    hit.length = int(match.group('length'))
                except ValueError:
                    # Not all lines have a length (e.g. 304 redirects)
                    hit.length = 0
                try:
                    hit.host = match.group('host')
                except IndexError:
                    # Some formats have no host.
                    pass
    
                # Check if the hit must be excluded.
                check_methods = inspect.getmembers(self, predicate=inspect.ismethod)
                if all((method(hit) for name, method in check_methods if name.startswith('check_'))):
                    Recorder.add_hit(hit)
    
    
    
    
    def main():
        """
        Start the importing process.
        """
        if config.options.show_progress:
            stats.start_monitor()
    
        stats.set_time_start()
    
        recorders = Recorder.launch(config.options.recorders)
    
        for filename in config.filenames:
    
    Cyril Bay's avatar
    Cyril Bay a validé
            parser.parse(filename)
    
    
        Recorder.wait_empty()
        stats.set_time_stop()
    
        if config.options.show_progress:
            stats.stop_monitor()
    
        try:
            Recorder.invalidate_reports()
        except Piwik.Error, e:
            pass
        stats.print_summary()
    
    
    
    def fatal_error(error, filename=None, lineno=None):
        print >> sys.stderr, 'Fatal error: %s' % error
        if filename and lineno is not None:
            print >> sys.stderr, (
                'You can restart the import of "%s" from the point it failed by '
                'specifying --skip=%d on the command line.\n' % (filename, lineno)
            )
        os._exit(1)
    
    
    if __name__ == '__main__':
        try:
            piwik = Piwik()
            config = Configuration()
            stats = Statistics()
            resolver = config.get_resolver()
            parser = Parser()
            main()
        except KeyboardInterrupt:
            pass