From 55a902fec68d710310f25da4d593cfe28c86cd1c Mon Sep 17 00:00:00 2001
From: mattab <matthieu.aubry@gmail.com>
Date: Tue, 15 Sep 2015 12:11:32 +1200
Subject: [PATCH] Add System tests for URL tracking & removing emoji characters

---
 .../OneVisitWithAbnormalPageviewUrls.php      | 10 ++++++
 tests/PHPUnit/System/UrlNormalizationTest.php |  8 +++--
 ...sSegmentedRef__Actions.getPageUrls_day.xml | 36 +++++++++++++++----
 ...tion_titles__Actions.getPageTitles_day.xml | 28 ++++++++++++---
 ...lization_urls__Actions.getPageUrls_day.xml | 36 +++++++++++++++----
 5 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/tests/PHPUnit/Fixtures/OneVisitWithAbnormalPageviewUrls.php b/tests/PHPUnit/Fixtures/OneVisitWithAbnormalPageviewUrls.php
index f7dc5ac337..2685efecd8 100644
--- a/tests/PHPUnit/Fixtures/OneVisitWithAbnormalPageviewUrls.php
+++ b/tests/PHPUnit/Fixtures/OneVisitWithAbnormalPageviewUrls.php
@@ -63,8 +63,18 @@ class OneVisitWithAbnormalPageviewUrls extends Fixture
         $t->setForceVisitDateTime(Date::factory($dateTime)->addHour(0.5)->getDatetime());
         self::checkResponse($t->doTrackPageView('incredible.title/'));
 
+        $t->setUrl('http://www.my.url/ꟽ碌㒧䊶亄ﶆⅅขκもኸόσशμεޖृ');
+        $t->setForceVisitDateTime(Date::factory($dateTime)->addHour(0.7)->getDatetime());
+        self::checkResponse($t->doTrackPageView('Valid URL, although strange looking'));
+
+        $t->setUrl('https://make.wordpress.org/?emoji=😎l&param=test');
+        $t->setForceVisitDateTime(Date::factory($dateTime)->addHour(0.8)->getDatetime());
+        self::checkResponse($t->doTrackPageView('Emoji here: %F0%9F%98%8E'));
+
+        // this pageview should be last
         $t->setUrl('https://example.org/foo/bar4.html');
         $t->setForceVisitDateTime(Date::factory($dateTime)->addHour(0.6)->getDatetime());
         self::checkResponse($t->doTrackPageView('incredible.title/'));
+
     }
 }
\ No newline at end of file
diff --git a/tests/PHPUnit/System/UrlNormalizationTest.php b/tests/PHPUnit/System/UrlNormalizationTest.php
index 9c02be3e34..849ea95c9d 100644
--- a/tests/PHPUnit/System/UrlNormalizationTest.php
+++ b/tests/PHPUnit/System/UrlNormalizationTest.php
@@ -75,7 +75,7 @@ class UrlNormalizationTest extends SystemTestCase
             'testSuffix' => '_pagesSegmented',
             'idSite'     => $idSite,
             'date'       => $dateTime,
-            'segment'    => 'pageUrl==example.org/foo/bar2.html',
+            'segment'    => 'pageUrl==' . urlencode('example.org/foo/bar2.html'),
         ));
         $return[] = array('Actions.getPageUrls', array(
             'testSuffix' => '_pagesSegmentedRef',
@@ -101,7 +101,7 @@ class UrlNormalizationTest extends SystemTestCase
     {
         $sql = "SELECT count(*) FROM " . Common::prefixTable('log_action');
         $count = Db::get()->fetchOne($sql);
-        $expected = 9; // 4 urls + 5 titles
+        $expected = 13; // 6 urls + 7 titles
         $this->assertEquals($expected, $count, "only $expected actions expected");
 
         $sql = "SELECT name, url_prefix FROM " . Common::prefixTable('log_action')
@@ -112,7 +112,9 @@ class UrlNormalizationTest extends SystemTestCase
             array('name' => 'example.org/foo/bar.html', 'url_prefix' => 0),
             array('name' => 'example.org/foo/bar2.html', 'url_prefix' => 3),
             array('name' => 'example.org/foo/bar3.html', 'url_prefix' => 1),
-            array('name' => 'example.org/foo/bar4.html', 'url_prefix' => 2)
+            array('name' => 'my.url/ꟽ碌㒧䊶亄ﶆⅅขκもኸόσशμεޖृ', 'url_prefix' => 1),
+            array('name' => 'make.wordpress.org/?emoji=�l&param=test', 'url_prefix' => 2),
+            array('name' => 'example.org/foo/bar4.html', 'url_prefix' => 2),
         );
         $this->assertEquals($expected, $urls, "normalization went wrong");
     }
diff --git a/tests/PHPUnit/System/expected/test_UrlNormalization_pagesSegmentedRef__Actions.getPageUrls_day.xml b/tests/PHPUnit/System/expected/test_UrlNormalization_pagesSegmentedRef__Actions.getPageUrls_day.xml
index bbb576d7a5..667ec31507 100644
--- a/tests/PHPUnit/System/expected/test_UrlNormalization_pagesSegmentedRef__Actions.getPageUrls_day.xml
+++ b/tests/PHPUnit/System/expected/test_UrlNormalization_pagesSegmentedRef__Actions.getPageUrls_day.xml
@@ -4,13 +4,13 @@
 		<label>foo</label>
 		<nb_visits>4</nb_visits>
 		<nb_hits>6</nb_hits>
-		<sum_time_spent>2160</sum_time_spent>
+		<sum_time_spent>2520</sum_time_spent>
 		<entry_nb_visits>1</entry_nb_visits>
-		<entry_nb_actions>6</entry_nb_actions>
+		<entry_nb_actions>8</entry_nb_actions>
 		<entry_sum_visit_length>2161</entry_sum_visit_length>
 		<entry_bounce_count>0</entry_bounce_count>
 		<exit_nb_visits>1</exit_nb_visits>
-		<avg_time_on_page>540</avg_time_on_page>
+		<avg_time_on_page>630</avg_time_on_page>
 		<bounce_rate>0%</bounce_rate>
 		<exit_rate>25%</exit_rate>
 		<subtable>
@@ -22,7 +22,7 @@
 				<sum_time_spent>1080</sum_time_spent>
 				<entry_nb_uniq_visitors>1</entry_nb_uniq_visitors>
 				<entry_nb_visits>1</entry_nb_visits>
-				<entry_nb_actions>6</entry_nb_actions>
+				<entry_nb_actions>8</entry_nb_actions>
 				<entry_sum_visit_length>2161</entry_sum_visit_length>
 				<entry_bounce_count>0</entry_bounce_count>
 				<avg_time_on_page>1080</avg_time_on_page>
@@ -46,8 +46,8 @@
 				<nb_visits>1</nb_visits>
 				<nb_uniq_visitors>1</nb_uniq_visitors>
 				<nb_hits>1</nb_hits>
-				<sum_time_spent>360</sum_time_spent>
-				<avg_time_on_page>360</avg_time_on_page>
+				<sum_time_spent>720</sum_time_spent>
+				<avg_time_on_page>720</avg_time_on_page>
 				<bounce_rate>0%</bounce_rate>
 				<exit_rate>0%</exit_rate>
 				<url>http://www.example.org/foo/bar3.html</url>
@@ -67,4 +67,28 @@
 			</row>
 		</subtable>
 	</row>
+	<row>
+		<label>/?emoji=�l&amp;param=test</label>
+		<nb_visits>1</nb_visits>
+		<nb_uniq_visitors>1</nb_uniq_visitors>
+		<nb_hits>1</nb_hits>
+		<sum_time_spent>0</sum_time_spent>
+		<avg_time_on_page>0</avg_time_on_page>
+		<bounce_rate>0%</bounce_rate>
+		<exit_rate>0%</exit_rate>
+		<url>https://make.wordpress.org/?emoji=�l&amp;param=test</url>
+		<segment>pageUrl==https%3A%2F%2Fmake.wordpress.org%2F%3Femoji%3D%EF%BF%BDl%26param%3Dtest</segment>
+	</row>
+	<row>
+		<label>/ꟽ碌㒧䊶亄ﶆⅅขκもኸόσशμεޖृ</label>
+		<nb_visits>1</nb_visits>
+		<nb_uniq_visitors>1</nb_uniq_visitors>
+		<nb_hits>1</nb_hits>
+		<sum_time_spent>360</sum_time_spent>
+		<avg_time_on_page>360</avg_time_on_page>
+		<bounce_rate>0%</bounce_rate>
+		<exit_rate>0%</exit_rate>
+		<url>http://www.my.url/ꟽ碌㒧䊶亄ﶆⅅขκもኸόσशμεޖृ</url>
+		<segment>pageUrl==http%3A%2F%2Fwww.my.url%2F%EA%9F%BD%EF%A4%BB%E3%92%A7%E4%8A%B6%E4%BA%84%EF%B6%86%E2%85%85%E0%B8%82%CE%BA%E3%82%82%E1%8A%B8%E1%BD%B9%CF%83%E0%A4%B6%CE%BC%CE%B5%DE%96%E0%A5%83</segment>
+	</row>
 </result>
\ No newline at end of file
diff --git a/tests/PHPUnit/System/expected/test_UrlNormalization_titles__Actions.getPageTitles_day.xml b/tests/PHPUnit/System/expected/test_UrlNormalization_titles__Actions.getPageTitles_day.xml
index 564a6ae28f..0549176cb6 100644
--- a/tests/PHPUnit/System/expected/test_UrlNormalization_titles__Actions.getPageTitles_day.xml
+++ b/tests/PHPUnit/System/expected/test_UrlNormalization_titles__Actions.getPageTitles_day.xml
@@ -6,7 +6,7 @@
 		<nb_hits>2</nb_hits>
 		<sum_time_spent>1080</sum_time_spent>
 		<entry_nb_visits>1</entry_nb_visits>
-		<entry_nb_actions>6</entry_nb_actions>
+		<entry_nb_actions>8</entry_nb_actions>
 		<entry_sum_visit_length>2161</entry_sum_visit_length>
 		<entry_bounce_count>0</entry_bounce_count>
 		<avg_time_on_page>540</avg_time_on_page>
@@ -21,7 +21,7 @@
 				<sum_time_spent>720</sum_time_spent>
 				<entry_nb_uniq_visitors>1</entry_nb_uniq_visitors>
 				<entry_nb_visits>1</entry_nb_visits>
-				<entry_nb_actions>6</entry_nb_actions>
+				<entry_nb_actions>8</entry_nb_actions>
 				<entry_sum_visit_length>2161</entry_sum_visit_length>
 				<entry_bounce_count>0</entry_bounce_count>
 				<avg_time_on_page>720</avg_time_on_page>
@@ -71,16 +71,36 @@
 			</row>
 		</subtable>
 	</row>
+	<row>
+		<label> Emoji here: 😎</label>
+		<nb_visits>1</nb_visits>
+		<nb_uniq_visitors>1</nb_uniq_visitors>
+		<nb_hits>1</nb_hits>
+		<sum_time_spent>0</sum_time_spent>
+		<avg_time_on_page>0</avg_time_on_page>
+		<bounce_rate>0%</bounce_rate>
+		<exit_rate>0%</exit_rate>
+	</row>
 	<row>
 		<label> incredible.title</label>
 		<nb_visits>1</nb_visits>
 		<nb_uniq_visitors>1</nb_uniq_visitors>
 		<nb_hits>2</nb_hits>
-		<sum_time_spent>360</sum_time_spent>
+		<sum_time_spent>720</sum_time_spent>
 		<exit_nb_uniq_visitors>1</exit_nb_uniq_visitors>
 		<exit_nb_visits>1</exit_nb_visits>
-		<avg_time_on_page>360</avg_time_on_page>
+		<avg_time_on_page>720</avg_time_on_page>
 		<bounce_rate>0%</bounce_rate>
 		<exit_rate>100%</exit_rate>
 	</row>
+	<row>
+		<label> Valid URL, although strange looking</label>
+		<nb_visits>1</nb_visits>
+		<nb_uniq_visitors>1</nb_uniq_visitors>
+		<nb_hits>1</nb_hits>
+		<sum_time_spent>360</sum_time_spent>
+		<avg_time_on_page>360</avg_time_on_page>
+		<bounce_rate>0%</bounce_rate>
+		<exit_rate>0%</exit_rate>
+	</row>
 </result>
\ No newline at end of file
diff --git a/tests/PHPUnit/System/expected/test_UrlNormalization_urls__Actions.getPageUrls_day.xml b/tests/PHPUnit/System/expected/test_UrlNormalization_urls__Actions.getPageUrls_day.xml
index bbb576d7a5..667ec31507 100644
--- a/tests/PHPUnit/System/expected/test_UrlNormalization_urls__Actions.getPageUrls_day.xml
+++ b/tests/PHPUnit/System/expected/test_UrlNormalization_urls__Actions.getPageUrls_day.xml
@@ -4,13 +4,13 @@
 		<label>foo</label>
 		<nb_visits>4</nb_visits>
 		<nb_hits>6</nb_hits>
-		<sum_time_spent>2160</sum_time_spent>
+		<sum_time_spent>2520</sum_time_spent>
 		<entry_nb_visits>1</entry_nb_visits>
-		<entry_nb_actions>6</entry_nb_actions>
+		<entry_nb_actions>8</entry_nb_actions>
 		<entry_sum_visit_length>2161</entry_sum_visit_length>
 		<entry_bounce_count>0</entry_bounce_count>
 		<exit_nb_visits>1</exit_nb_visits>
-		<avg_time_on_page>540</avg_time_on_page>
+		<avg_time_on_page>630</avg_time_on_page>
 		<bounce_rate>0%</bounce_rate>
 		<exit_rate>25%</exit_rate>
 		<subtable>
@@ -22,7 +22,7 @@
 				<sum_time_spent>1080</sum_time_spent>
 				<entry_nb_uniq_visitors>1</entry_nb_uniq_visitors>
 				<entry_nb_visits>1</entry_nb_visits>
-				<entry_nb_actions>6</entry_nb_actions>
+				<entry_nb_actions>8</entry_nb_actions>
 				<entry_sum_visit_length>2161</entry_sum_visit_length>
 				<entry_bounce_count>0</entry_bounce_count>
 				<avg_time_on_page>1080</avg_time_on_page>
@@ -46,8 +46,8 @@
 				<nb_visits>1</nb_visits>
 				<nb_uniq_visitors>1</nb_uniq_visitors>
 				<nb_hits>1</nb_hits>
-				<sum_time_spent>360</sum_time_spent>
-				<avg_time_on_page>360</avg_time_on_page>
+				<sum_time_spent>720</sum_time_spent>
+				<avg_time_on_page>720</avg_time_on_page>
 				<bounce_rate>0%</bounce_rate>
 				<exit_rate>0%</exit_rate>
 				<url>http://www.example.org/foo/bar3.html</url>
@@ -67,4 +67,28 @@
 			</row>
 		</subtable>
 	</row>
+	<row>
+		<label>/?emoji=�l&amp;param=test</label>
+		<nb_visits>1</nb_visits>
+		<nb_uniq_visitors>1</nb_uniq_visitors>
+		<nb_hits>1</nb_hits>
+		<sum_time_spent>0</sum_time_spent>
+		<avg_time_on_page>0</avg_time_on_page>
+		<bounce_rate>0%</bounce_rate>
+		<exit_rate>0%</exit_rate>
+		<url>https://make.wordpress.org/?emoji=�l&amp;param=test</url>
+		<segment>pageUrl==https%3A%2F%2Fmake.wordpress.org%2F%3Femoji%3D%EF%BF%BDl%26param%3Dtest</segment>
+	</row>
+	<row>
+		<label>/ꟽ碌㒧䊶亄ﶆⅅขκもኸόσशμεޖृ</label>
+		<nb_visits>1</nb_visits>
+		<nb_uniq_visitors>1</nb_uniq_visitors>
+		<nb_hits>1</nb_hits>
+		<sum_time_spent>360</sum_time_spent>
+		<avg_time_on_page>360</avg_time_on_page>
+		<bounce_rate>0%</bounce_rate>
+		<exit_rate>0%</exit_rate>
+		<url>http://www.my.url/ꟽ碌㒧䊶亄ﶆⅅขκもኸόσशμεޖृ</url>
+		<segment>pageUrl==http%3A%2F%2Fwww.my.url%2F%EA%9F%BD%EF%A4%BB%E3%92%A7%E4%8A%B6%E4%BA%84%EF%B6%86%E2%85%85%E0%B8%82%CE%BA%E3%82%82%E1%8A%B8%E1%BD%B9%CF%83%E0%A4%B6%CE%BC%CE%B5%DE%96%E0%A5%83</segment>
+	</row>
 </result>
\ No newline at end of file
-- 
GitLab