From 54428a657ca81211f0ffdd137b724d8a01398703 Mon Sep 17 00:00:00 2001
From: Thomas Steur <thomas.steur@gmail.com>
Date: Fri, 15 Jan 2016 00:07:44 +0000
Subject: [PATCH] add possibility to specify charset when importing in batch

---
 config/global.ini.php                              |  1 +
 core/DataAccess/ArchiveWriter.php                  |  2 +-
 core/Db/BatchInsert.php                            |  7 ++-----
 core/Updates/2.1.1-b11.php                         |  2 +-
 .../Diagnostics/Diagnostic/LoadDataInfileCheck.php |  3 ++-
 .../PHPUnit/Integration/ArchiveProcessingTest.php  | 14 +++++++++-----
 6 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/config/global.ini.php b/config/global.ini.php
index 00de6ff173..afa6a03c87 100644
--- a/config/global.ini.php
+++ b/config/global.ini.php
@@ -36,6 +36,7 @@ port = 3306
 adapter = PDO\MYSQL
 type = InnoDB
 schema = Mysql
+charset = utf8
 
 [tests]
 ; needed in order to run tests.
diff --git a/core/DataAccess/ArchiveWriter.php b/core/DataAccess/ArchiveWriter.php
index 2ef614990f..3473a33c39 100644
--- a/core/DataAccess/ArchiveWriter.php
+++ b/core/DataAccess/ArchiveWriter.php
@@ -212,7 +212,7 @@ class ArchiveWriter
         $tableName = $this->getTableNameToInsert($valueSeen);
         $fields    = $this->getInsertFields();
 
-        BatchInsert::tableInsertBatch($tableName, $fields, $values);
+        BatchInsert::tableInsertBatch($tableName, $fields, $values, $throwException = false, $charset = 'latin1');
 
         return true;
     }
diff --git a/core/Db/BatchInsert.php b/core/Db/BatchInsert.php
index 254e03d2d4..011954cd11 100644
--- a/core/Db/BatchInsert.php
+++ b/core/Db/BatchInsert.php
@@ -54,7 +54,7 @@ class BatchInsert
      * @throws Exception
      * @return bool  True if the bulk LOAD was used, false if we fallback to plain INSERTs
      */
-    public static function tableInsertBatch($tableName, $fields, $values, $throwException = false)
+    public static function tableInsertBatch($tableName, $fields, $values, $throwException = false, $charset = 'utf8')
     {
         $filePath = StaticContainer::get('path.tmp') . '/assets/' . $tableName . '-' . Common::generateUniqId() . '.csv';
 
@@ -72,12 +72,9 @@ class BatchInsert
                         },
                     'eol'              => "\r\n",
                     'null'             => 'NULL',
+                    'charset'          => $charset
                 );
 
-                // see https://github.com/piwik/piwik/issues/9419#issuecomment-170851440
-                // if charset is utf8 we get this error: Invalid utf8 character string: '"x':
-                $fileSpec['charset'] = 'latin1';
-
                 self::createCSVFile($filePath, $fileSpec, $values);
 
                 if (!is_readable($filePath)) {
diff --git a/core/Updates/2.1.1-b11.php b/core/Updates/2.1.1-b11.php
index 0d1ef47f3d..543ae284e8 100644
--- a/core/Updates/2.1.1-b11.php
+++ b/core/Updates/2.1.1-b11.php
@@ -94,7 +94,7 @@ class Updates_2_1_1_b11 extends Updates
                     foreach ($missingIdArchives as $missingIdArchive) {
                         $params[] = array_values($missingIdArchive);
                     }
-                    BatchInsert::tableInsertBatch($table, array_keys(reset($missingIdArchives)), $params, $throwException = false);
+                    BatchInsert::tableInsertBatch($table, array_keys(reset($missingIdArchives)), $params, $throwException = false, $charset = 'latin1');
                 } catch (\Exception $ex) {
                     Updater::handleQueryError($ex, "<batch insert>", false, __FILE__);
                 }
diff --git a/plugins/Diagnostics/Diagnostic/LoadDataInfileCheck.php b/plugins/Diagnostics/Diagnostic/LoadDataInfileCheck.php
index 2c69111ebb..0d06ceef04 100644
--- a/plugins/Diagnostics/Diagnostic/LoadDataInfileCheck.php
+++ b/plugins/Diagnostics/Diagnostic/LoadDataInfileCheck.php
@@ -50,7 +50,8 @@ class LoadDataInfileCheck implements Diagnostic
                     array($testOptionNames[0], '1'),
                     array($testOptionNames[1], '2'),
                 ),
-                $throwException = true
+                $throwException = true,
+                $charset = 'latin1'
             );
         } catch (\Exception $ex) {
             $errorMessage = str_replace("\n", "<br/>", $ex->getMessage());
diff --git a/tests/PHPUnit/Integration/ArchiveProcessingTest.php b/tests/PHPUnit/Integration/ArchiveProcessingTest.php
index 42e95d4b43..358528270f 100644
--- a/tests/PHPUnit/Integration/ArchiveProcessingTest.php
+++ b/tests/PHPUnit/Integration/ArchiveProcessingTest.php
@@ -17,6 +17,7 @@ use Piwik\DataAccess\ArchiveTableCreator;
 use Piwik\Date;
 use Piwik\Db;
 use Piwik\Db\BatchInsert;
+use Piwik\DbHelper;
 use Piwik\Period;
 use Piwik\Piwik;
 use Piwik\Plugins\SitesManager\API;
@@ -306,11 +307,12 @@ class ArchiveProcessingTest extends IntegrationTestCase
             $didWeUseBulk = BatchInsert::tableInsertBatch($table,
                 array('idsite', 'url'),
                 $data,
-                $throwException = true);
+                $throwException = true, 'utf8');
 
         } catch (Exception $e) {
             $didWeUseBulk = $e->getMessage();
         }
+
         $this->_checkLoadDataInFileWasUsed($didWeUseBulk);
 
         if ($didWeUseBulk === true) {
@@ -377,7 +379,7 @@ class ArchiveProcessingTest extends IntegrationTestCase
             $didWeUseBulk = BatchInsert::tableInsertBatch($table,
                 array('idarchive', 'name', 'idsite', 'date1', 'date2', 'period', 'ts_archived', 'value'),
                 $data,
-                $throwException = true);
+                $throwException = true, $charset = 'latin1');
         } catch (Exception $e) {
             $didWeUseBulk = $e->getMessage();
         }
@@ -388,7 +390,7 @@ class ArchiveProcessingTest extends IntegrationTestCase
             $this->_checkTableIsExpectedBlob($table, $data);
         }
         // INSERT again the bulk. Because we use keyword LOCAL the data will be REPLACED automatically (see mysql doc)
-        $didWeUseBulk = BatchInsert::tableInsertBatch($table, array('idarchive', 'name', 'idsite', 'date1', 'date2', 'period', 'ts_archived', 'value'), $data);
+        $didWeUseBulk = BatchInsert::tableInsertBatch($table, array('idarchive', 'name', 'idsite', 'date1', 'date2', 'period', 'ts_archived', 'value'), $data, $throw = false, $charset = 'latin1');
         if ($didWeUseBulk === true) {
             $this->_checkTableIsExpectedBlob($table, $data);
         }
@@ -421,9 +423,10 @@ class ArchiveProcessingTest extends IntegrationTestCase
     protected function _checkTableIsExpected($table, $data)
     {
         $fetched = Db::fetchAll('SELECT * FROM ' . $table);
+
         foreach ($data as $id => $row) {
-            $this->assertEquals($fetched[$id]['idsite'], $data[$id][0], "record $id is not {$data[$id][0]}");
-            $this->assertEquals($fetched[$id]['url'], $data[$id][1], "Record $id bug, not {$data[$id][1]} BUT {$fetched[$id]['url']}");
+            $this->assertEquals($data[$id][0], $fetched[$id]['idsite'], "record $id is not {$data[$id][0]}");
+            $this->assertEquals($data[$id][1], $fetched[$id]['url'], "Record $id bug, not {$data[$id][1]} BUT {$fetched[$id]['url']}");
         }
     }
 
@@ -484,6 +487,7 @@ class ArchiveProcessingTest extends IntegrationTestCase
         for ($i = 0; $i < 256; $i++) {
             $str .= chr($i);
         }
+
         $array[] = array(1, 'bytes 0-255', 1, '2011-03-31', '2011-03-31', Piwik::$idPeriods['day'], $ts, $str);
 
         $array[] = array(2, 'compressed string', 1, '2011-03-31', '2011-03-31', Piwik::$idPeriods['day'], $ts, gzcompress(" \n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942"));
-- 
GitLab