Skip to content

Commit

Permalink
Always JOIN on page table to avoid counting orphan revisions
Browse files Browse the repository at this point in the history
Bug: T355027
  • Loading branch information
MusikAnimal committed Jan 16, 2024
1 parent da216f0 commit 2844ef7
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
23 changes: 22 additions & 1 deletion src/Repository/EditCounterRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ public function getPairData(Project $project, User $user): array
// Prepare the queries and execute them.
$archiveTable = $project->getTableName('archive');
$revisionTable = $project->getTableName('revision');
$pageTable = $project->getTableName('page');

// Always JOIN on page, see T355027
$pageJoin = "JOIN $pageTable ON rev_page = page_id";

if ($user->isIpRange()) {
$ipcTable = $project->getTableName('ip_changes');
Expand Down Expand Up @@ -86,38 +90,46 @@ public function getPairData(Project $project, User $user): array
-- Revision counts.
SELECT 'live' AS `key`, COUNT(rev_id) AS val FROM $revisionTable
$pageJoin
$ipcJoin
WHERE $whereClause
) UNION (
SELECT 'day' AS `key`, COUNT(rev_id) AS val FROM $revisionTable
$pageJoin
$ipcJoin
WHERE $whereClause AND rev_timestamp >= DATE_SUB(NOW(), INTERVAL 1 DAY)
) UNION (
SELECT 'week' AS `key`, COUNT(rev_id) AS val FROM $revisionTable
$pageJoin
$ipcJoin
WHERE $whereClause AND rev_timestamp >= DATE_SUB(NOW(), INTERVAL 1 WEEK)
) UNION (
SELECT 'month' AS `key`, COUNT(rev_id) AS val FROM $revisionTable
$pageJoin
$ipcJoin
WHERE $whereClause AND rev_timestamp >= DATE_SUB(NOW(), INTERVAL 1 MONTH)
) UNION (
SELECT 'year' AS `key`, COUNT(rev_id) AS val FROM $revisionTable
$pageJoin
$ipcJoin
WHERE $whereClause AND rev_timestamp >= DATE_SUB(NOW(), INTERVAL 1 YEAR)
) UNION (
SELECT 'minor' AS `key`, COUNT(rev_id) AS val FROM $revisionTable
$pageJoin
$ipcJoin
WHERE $whereClause AND rev_minor_edit = 1
-- Page counts.
) UNION (
SELECT 'edited-live' AS `key`, COUNT(DISTINCT rev_page) AS `val`
FROM $revisionTable
$pageJoin
$ipcJoin
WHERE $whereClause
) UNION (
SELECT 'created-live' AS `key`, COUNT(DISTINCT rev_page) AS `val`
FROM $revisionTable
$pageJoin
$ipcJoin
WHERE $whereClause AND rev_parent_id = 0
)";
Expand Down Expand Up @@ -503,17 +515,23 @@ public function getTimeCard(Project $project, User $user): array
}

$hourInterval = 1;
$revisionTable = $project->getTableName('revision');
// Always JOIN on page, see T325492
$pageTable = $project->getTableName('page');

if ($user->isIpRange()) {
$column = 'ipc_rev_timestamp';
$table = $project->getTableName('ip_changes');
[$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
$whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
$joinClause = "JOIN $revisionTable ON rev_id = ipc_rev_id
JOIN $pageTable ON rev_page = page_id";
} else {
$column = 'rev_timestamp';
$table = $project->getTableName('revision');
$table = $revisionTable;
$whereClause = 'rev_actor = :actorId';
$params = ['actorId' => $user->getActorId($project)];
$joinClause = "JOIN $pageTable ON rev_page = page_id";
}

$xCalc = "ROUND(HOUR($column)/$hourInterval) * $hourInterval";
Expand All @@ -523,6 +541,7 @@ public function getTimeCard(Project $project, User $user): array
$xCalc AS `hour`,
COUNT($column) AS `value`
FROM $table
$joinClause
WHERE $whereClause
GROUP BY DAYOFWEEK($column), $xCalc";

Expand All @@ -549,6 +568,7 @@ public function getEditSizeData(Project $project, User $user): array

// Prepare the queries and execute them.
$revisionTable = $project->getTableName('revision');
$pageTable = $project->getTableName('page');
$ipcJoin = '';
$whereClause = 'revs.rev_actor = :actorId';
$params = ['actorId' => $user->getActorId($project)];
Expand All @@ -566,6 +586,7 @@ public function getEditSizeData(Project $project, User $user): array
FROM (
SELECT (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS size
FROM $revisionTable AS revs
JOIN $pageTable ON revs.rev_page = page_id
$ipcJoin
LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
WHERE $whereClause
Expand Down
5 changes: 3 additions & 2 deletions src/Repository/SimpleEditCounterRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ private function fetchDataNormal(
$arDateConditions = $this->getDateConditions($start, $end, false, '', 'ar_timestamp');
$revDateConditions = $this->getDateConditions($start, $end);

$revNamespaceJoinSql = 'all' === $namespace ? '' : "JOIN $pageTable ON rev_page = page_id";
// Always JOIN on page, see T325492
$revNamespaceJoinSql = "JOIN $pageTable ON rev_page = page_id";
$revNamespaceWhereSql = 'all' === $namespace ? '' : "AND page_namespace = $namespace";
$arNamespaceWhereSql = 'all' === $namespace ? '' : "AND ar_namespace = $namespace";

Expand Down Expand Up @@ -126,7 +127,7 @@ private function fetchDataIpRange(
$sql = "SELECT 'rev' AS source, COUNT(*) AS value
FROM $ipcTable
$revNamespaceJoinSql
WHERE ipc_hex BETWEEN :start AND :end
WHERE ipc_hex BETWEEN :start AND :end
$revDateConditions
$revNamespaceWhereSql";

Expand Down

0 comments on commit 2844ef7

Please sign in to comment.