Skip to content
Extraits de code Groupes Projets
Valider c828df60 rédigé par BeezyT's avatar BeezyT
Parcourir les fichiers

refs #3330 ranking query

 * Piwik_RankingQuery encapsulates the logic to apply the limit + grouping of others to an arbitrary select query
 * queryActionsByDimension() and queryVisitsByDimension() in Piwik_ArchiveProcessing_Day get new parameters to use the ranking query

git-svn-id: http://dev.piwik.org/svn/trunk@6803 59fd770c-687e-43c8-a1e3-f5a4ff64c105
parent ad60d64f
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -299,12 +299,31 @@ class Piwik_ArchiveProcessing_Day extends Piwik_ArchiveProcessing
/**
* Returns the actions by the given dimension
*
* - The basic use case is to use $label and optionally $where.
* - If you want to apply a limit and group the others, use $orderBy to sort the way you
* want the limit to be applied and pass a pre-configured instance of Piwik_RankingQuery.
* The ranking query instance has to have a limit and at least one label column.
* See Piwik_RankingQuery::setLimit() and Piwik_RankingQuery::addLabelColumn().
* If $rankingQuery is set, the return value is the array returned by
* Piwik_RankingQuery::execute().
* - By default, the method only queries log_link_visit_action. If you need data from
* log_action (e.g. to partition the result from the ranking query into the different
* action types), use $joinLogActionOnColumn and $addSelect to join log_action and select
* the column you need from log_action.
*
*
* @param array|string $label
* @param string $where
* @param array|string $label the dimensions(s) you're interested in
* @param string $where where clause
* @param bool|string $orderBy order by clause
* @param Piwik_RankingQuery $rankingQuery pre-configured ranking query instance
* @param bool|string $joinLogActionOnColumn column from log_link_visit_action that
* log_action should be joined on
* @param bool|string $addSelect additional select clause
* @return mixed
*/
public function queryActionsByDimension($label, $where = '')
public function queryActionsByDimension($label, $where = '', $orderBy = false, $rankingQuery = null,
$joinLogActionOnColumn = false, $addSelect = false)
{
if(is_array($label))
{
......@@ -327,15 +346,13 @@ class Piwik_ArchiveProcessing_Day extends Piwik_ArchiveProcessing
$groupBy = 'label';
}
if(!empty($where))
if(!empty($where))
{
$where = sprintf($where, "log_link_visit_action", "log_link_visit_action");
$where = ' AND '.$where;
}
/*
* Page URLs and Page names, general stats
*/
// page URLs and page names, general stats
$select = "$select,
count(distinct log_link_visit_action.idvisit) as `". Piwik_Archive::INDEX_NB_VISITS ."`,
count(distinct log_link_visit_action.idvisitor) as `". Piwik_Archive::INDEX_NB_UNIQ_VISITORS ."`,
......@@ -343,31 +360,64 @@ class Piwik_ArchiveProcessing_Day extends Piwik_ArchiveProcessing
$from = "log_link_visit_action";
if ($joinLogActionOnColumn !== false)
{
$from = array(
$from,
array(
'table' => 'log_action',
'joinOn' => 'log_action.idaction = log_link_visit_action.'.$joinLogActionOnColumn,
)
);
}
if ($addSelect !== false)
{
$select .= ', '.$addSelect;
}
$where = "log_link_visit_action.server_time >= ?
AND log_link_visit_action.server_time <= ?
AND log_link_visit_action.idsite = ?
$where";
$bind = array($this->getStartDatetimeUTC(), $this->getEndDatetimeUTC(), $this->idsite);
$query = $this->getSegment()->getSelectQuery($select, $from, $where, $bind, $orderBy=false, $groupBy);
$query = $this->getSegment()->getSelectQuery($select, $from, $where, $bind, $orderBy, $groupBy);
if ($rankingQuery !== null)
{
$sumColumns = array(
Piwik_Archive::INDEX_NB_UNIQ_VISITORS,
Piwik_Archive::INDEX_NB_VISITS,
Piwik_Archive::INDEX_NB_ACTIONS
);
$rankingQuery->addColumn($sumColumns, 'sum');
return $rankingQuery->execute($query['sql'], $query['bind']);
}
return $this->db->query($query['sql'], $query['bind']);
}
/**
* Query visits by dimension
*
* @param array|string $label mixed Can be a string, eg. "referer_name", will be aliased as 'label' in the returned rows
* Can also be an array of strings, when the dimension spans multiple fields, eg. array("referer_name", "referer_keyword")
* @param string $where Additional condition for WHERE clause
* @param array|string $label Can be a string, eg. "referer_name", will be aliased as 'label' in the returned rows
* Can also be an array of strings, when the dimension spans multiple fields,
* eg. array("referer_name", "referer_keyword")
* @param string $where Additional condition for WHERE clause
* @param bool|string $orderBy ORDER BY clause. This is needed in combination with $rankingQuery.
* @param Piwik_RankingQuery $rankingQuery
* A pre-configured ranking query instance that is used to limit the result.
* If set, the return value is the array returned by Piwik_RankingQuery::execute().
* @return mixed
*/
public function queryVisitsByDimension($label, $where = '')
public function queryVisitsByDimension($label, $where = '', $orderBy = false, $rankingQuery = null)
{
if(is_array($label))
{
$groupBy = "log_visit.".implode(", log_visit.", $label);
foreach($label as &$field)
foreach($label as &$field)
{
$field = 'log_visit.'.$field.' AS '.$field;
}
......@@ -403,7 +453,19 @@ class Piwik_ArchiveProcessing_Day extends Piwik_ArchiveProcessing
$bind = array($this->getStartDatetimeUTC(), $this->getEndDatetimeUTC(), $this->idsite);
$query = $this->getSegment()->getSelectQuery($select, $from, $where, $bind, $orderBy=false, $groupBy);
$query = $this->getSegment()->getSelectQuery($select, $from, $where, $bind, $orderBy, $groupBy);
if ($rankingQuery !== null)
{
$sumColumns = array(
Piwik_Archive::INDEX_NB_UNIQ_VISITORS, Piwik_Archive::INDEX_NB_VISITS,
Piwik_Archive::INDEX_NB_ACTIONS, Piwik_Archive::INDEX_SUM_VISIT_LENGTH,
Piwik_Archive::INDEX_BOUNCE_COUNT, Piwik_Archive::INDEX_NB_VISITS_CONVERTED
);
$rankingQuery->addColumn($sumColumns, 'sum');
$rankingQuery->addColumn(Piwik_Archive::INDEX_MAX_ACTIONS, 'max');
return $rankingQuery->execute($query['sql'], $query['bind']);
}
return $this->db->query($query['sql'], $query['bind']);
}
......@@ -925,4 +987,4 @@ class Piwik_ArchiveProcessing_Day extends Piwik_ArchiveProcessing
Piwik_Archive::INDEX_GOAL_ECOMMERCE_ITEMS => 0,
);
}
}
\ No newline at end of file
}
<?php
/**
* Piwik - Open source web analytics
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
* @version $Id$
*
* @category Piwik
* @package Piwik
*/
/**
* The ranking query class wraps an arbitrary SQL query with more SQL that limits
* the number of results while grouping the rest to "Others" and allows for some
* more fancy things that can be configured via method calls of this class. The
* advanced use cases are explained in the doc comments of the methods.
*
* The general use case looks like this:
*
* // limit to 500 rows + "Others"
* $rankingQuery = new Piwik_RankingQuery(500);
*
* // idaction_url will be "Others" in the row that contains the aggregated rest
* $rankingQuery->addLabelColumn('idaction_url');
*
* // the actual query. it's important to sort it before the limit is applied
* $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits
* FROM log_link_visit_action
* GROUP BY idaction_url
* ORDER BY nb_hits DESC';
*
* // execute the query
* $rankingQuery->execute($sql);
*
*
* @package Piwik
*/
class Piwik_RankingQuery
{
/**
* Contains the labels of the inner query.
* Format: "label" => true (to make sure labels don't appear twice)
* @var array
*/
private $labelColumns = array();
/**
* The columns of the inner query that are not labels
* Format: "label" => "aggregation function" or false for no aggregation
* @var array
*/
private $additionalColumns = array();
/**
* The limit for each group
* @var int
*/
private $limit = 5;
/**
* The name of the columns that marks rows to be excluded from the limit
* @var string
*/
private $columnToMarkExcludedRows = false;
/**
* The column that is used to partition the result
* @var bool|string
*/
private $partitionColumn = false;
/**
* The possible values for the column $this->partitionColumn
* @var array
*/
private $partitionColumnValues = array();
/**
* The constructor.
* Can be used as a shortcut for setLimit()
*/
public function __construct($limit=false)
{
if ($limit !== false)
{
$this->setLimit($limit);
}
}
/**
* Set the limit after which everything is grouped to "Others"
*
* @param $limit int
*/
public function setLimit($limit)
{
$this->limit = $limit;
}
/**
* Add a label column.
* Labels are the columns that are replaced with "Others" after the limit.
*
* @param $labelColumn string|array
*/
public function addLabelColumn($labelColumn)
{
if (is_array($labelColumn))
{
foreach ($labelColumn as $label)
{
$this->addLabelColumn($label);
}
return;
}
$this->labelColumns[$labelColumn] = true;
}
/**
* Add a column that has be added to the outer queries.
*
* @param $column
* @param string|bool $aggregationFunction string
* If set, this function is used to aggregate the values of "Others"
*/
public function addColumn($column, $aggregationFunction=false)
{
if (is_array($column))
{
foreach ($column as $c)
{
$this->addColumn($c, $aggregationFunction);
}
return;
}
$this->additionalColumns[$column] = $aggregationFunction;
}
/**
* The inner query can have a column that marks the rows that shall be excluded from limiting.
* If the column contains 0, rows are handled as usual. For values greater than 0, separate
* groups are made. If this method is used, generate() returns both the regular result and
* the excluded columns separately.
*
* @param $column string name of the column
* @throws Exception when method is used more than once
*/
public function setColumnToMarkExcludedRows($column)
{
if ($this->columnToMarkExcludedRows !== false)
{
throw new Exception("setColumnToMarkExcludedRows can only be used once");
}
$this->columnToMarkExcludedRows = $column;
$this->addColumn($this->columnToMarkExcludedRows);
}
/**
* This method can be used to get multiple groups in one go. For example, one might query
* the top following pages, outlinks and downloads in one go by using log_action.type as
* the partition column and [TYPE_ACTION_URL, TYPE_OUTLINK, TYPE_DOWNLOAD] as the possible
* values.
* When this method has been used, generate() returns as array that contains one array
* per group of data.
*
* @param $partitionColumn string
* @param $possibleValues array of integers
* @throws Exception when method is used more than once
*/
public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues)
{
if ($this->partitionColumn !== false)
{
throw new Exception("partitionResultIntoMultipleGroups can only be used once");
}
$this->partitionColumn = $partitionColumn;
$this->partitionColumnValues = $possibleValues;
$this->addColumn($partitionColumn);
}
/**
* Execute the query.
* The object has to be configured first using the other methods.
*
* @param $innerQuery string The "payload" query. The result has be sorted as desired.
* @param $bind array Bindings for the inner query.
* @return array The format depends on which methods have been used
* to configure the ranking query
*/
public function execute($innerQuery, $bind=array())
{
$query = $this->generateQuery($innerQuery);
$data = Piwik_FetchAll($query, $bind);
if ($this->columnToMarkExcludedRows !== false)
{
// split the result into the regular result and the rows with special treatment
$excludedFromLimit = array();
$result = array();
foreach ($data as &$row)
{
if ($row[$this->columnToMarkExcludedRows] != 0)
{
$excludedFromLimit[] = $row;
}
else
{
$result[] = $row;
}
}
$data = array(
'result' => &$result,
'excludedFromLimit' => &$excludedFromLimit
);
}
if ($this->partitionColumn !== false)
{
if ($this->columnToMarkExcludedRows !== false)
{
$data['result'] = $this->splitPartitions($data['result']);
}
else
{
$data = $this->splitPartitions($data);
}
}
return $data;
}
private function splitPartitions(&$data)
{
$result = array();
foreach ($data as &$row)
{
$partition = $row[$this->partitionColumn];
if (!isset($result[$partition]))
{
$result[$partition] = array();
}
$result[$partition][] = &$row;
}
return $result;
}
/**
* Generate the SQL code that does the magic.
* If you want to get the result, use execute() instead. If you're interested in
* the generated SQL code (e.g. for debugging), use this method.
*
* @param $innerQuery string SQL of the actual query
* @return string entire ranking query SQL
*/
public function generateQuery($innerQuery)
{
// +1 to include "Others"
$limit = $this->limit + 1;
$counterExpression = $this->getCounterExpression($limit);
// generate select clauses for label columns
$labelColumnsString = '`'.implode('`, `', array_keys($this->labelColumns)).'`';
$labelColumnsOthersSwitch = array();
foreach ($this->labelColumns as $column => $true)
{
$labelColumnsOthersSwitch[] = "
CASE
WHEN counter = $limit THEN \"Others\"
ELSE `$column`
END AS `$column`
";
}
$labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch);
// generate select clauses for additional columns
$additionalColumnsString = '';
$additionalColumnsAggregatedString = '';
foreach ($this->additionalColumns as $additionalColumn => $aggregation)
{
$additionalColumnsString .= ', `'.$additionalColumn.'`';
if ($aggregation !== false)
{
$additionalColumnsAggregatedString .= ', '.$aggregation.'(`'.$additionalColumn.'`) AS `'.$additionalColumn.'`';
}
else
{
$additionalColumnsAggregatedString .= ', `'.$additionalColumn.'`';
}
}
// initialize the counters
if ($this->partitionColumn !== false)
{
$initCounter = '';
foreach ($this->partitionColumnValues as $value)
{
$initCounter .= '( SELECT @counter'.intval($value).':=0 ) initCounter'.intval($value).', ';
}
}
else
{
$initCounter = '( SELECT @counter:=0 ) initCounter,';
}
// add a counter to the query
// we rely on the sorting of the inner query
$withCounter = "
SELECT
$labelColumnsString,
$counterExpression AS counter
$additionalColumnsString
FROM
$initCounter
( $innerQuery ) actualQuery
";
// group by the counter - this groups "Others" because the counter stops at $limit
$groupBy = 'counter';
if ($this->partitionColumn !== false)
{
$groupBy .= ', `'.$this->partitionColumn.'`';
}
$groupOthers = "
SELECT
$labelColumnsOthersSwitch
$additionalColumnsAggregatedString
FROM ( $withCounter ) AS withCounter
GROUP BY $groupBy
";
return $groupOthers;
}
private function getCounterExpression($limit)
{
$whens = array();
if ($this->columnToMarkExcludedRows !== false)
{
// when a row has been specified that marks which records should be excluded
// from limiting, we don't give those rows the normal counter but -1 times the
// value they had before. this way, they have a separate number space (i.e. negative
// integers).
$whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}";
}
if ($this->partitionColumn !== false)
{
// partition: one counter per possible value
foreach ($this->partitionColumnValues as $value)
{
$isValue = '`'.$this->partitionColumn.'` = '.intval($value);
$counter = '@counter'.intval($value);
$whens[] = "WHEN $isValue AND $counter = $limit THEN $limit";
$whens[] = "WHEN $isValue THEN $counter:=$counter+1";
}
$whens[] = "ELSE 0";
}
else
{
// no partitioning: add a single counter
$whens[] = "WHEN @counter = $limit THEN $limit";
$whens[] = "ELSE @counter:=@counter+1";
}
return "
CASE
".implode("
", $whens)."
END
";
}
}
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter