From 54428a657ca81211f0ffdd137b724d8a01398703 Mon Sep 17 00:00:00 2001 From: Thomas Steur <thomas.steur@gmail.com> Date: Fri, 15 Jan 2016 00:07:44 +0000 Subject: [PATCH] add possibility to specify charset when importing in batch --- config/global.ini.php | 1 + core/DataAccess/ArchiveWriter.php | 2 +- core/Db/BatchInsert.php | 7 ++----- core/Updates/2.1.1-b11.php | 2 +- .../Diagnostics/Diagnostic/LoadDataInfileCheck.php | 3 ++- .../PHPUnit/Integration/ArchiveProcessingTest.php | 14 +++++++++----- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/config/global.ini.php b/config/global.ini.php index 00de6ff173..afa6a03c87 100644 --- a/config/global.ini.php +++ b/config/global.ini.php @@ -36,6 +36,7 @@ port = 3306 adapter = PDO\MYSQL type = InnoDB schema = Mysql +charset = utf8 [tests] ; needed in order to run tests. diff --git a/core/DataAccess/ArchiveWriter.php b/core/DataAccess/ArchiveWriter.php index 2ef614990f..3473a33c39 100644 --- a/core/DataAccess/ArchiveWriter.php +++ b/core/DataAccess/ArchiveWriter.php @@ -212,7 +212,7 @@ class ArchiveWriter $tableName = $this->getTableNameToInsert($valueSeen); $fields = $this->getInsertFields(); - BatchInsert::tableInsertBatch($tableName, $fields, $values); + BatchInsert::tableInsertBatch($tableName, $fields, $values, $throwException = false, $charset = 'latin1'); return true; } diff --git a/core/Db/BatchInsert.php b/core/Db/BatchInsert.php index 254e03d2d4..011954cd11 100644 --- a/core/Db/BatchInsert.php +++ b/core/Db/BatchInsert.php @@ -54,7 +54,7 @@ class BatchInsert * @throws Exception * @return bool True if the bulk LOAD was used, false if we fallback to plain INSERTs */ - public static function tableInsertBatch($tableName, $fields, $values, $throwException = false) + public static function tableInsertBatch($tableName, $fields, $values, $throwException = false, $charset = 'utf8') { $filePath = StaticContainer::get('path.tmp') . '/assets/' . $tableName . '-' . Common::generateUniqId() . '.csv'; @@ -72,12 +72,9 @@ class BatchInsert }, 'eol' => "\r\n", 'null' => 'NULL', + 'charset' => $charset ); - // see https://github.com/piwik/piwik/issues/9419#issuecomment-170851440 - // if charset is utf8 we get this error: Invalid utf8 character string: '"x': - $fileSpec['charset'] = 'latin1'; - self::createCSVFile($filePath, $fileSpec, $values); if (!is_readable($filePath)) { diff --git a/core/Updates/2.1.1-b11.php b/core/Updates/2.1.1-b11.php index 0d1ef47f3d..543ae284e8 100644 --- a/core/Updates/2.1.1-b11.php +++ b/core/Updates/2.1.1-b11.php @@ -94,7 +94,7 @@ class Updates_2_1_1_b11 extends Updates foreach ($missingIdArchives as $missingIdArchive) { $params[] = array_values($missingIdArchive); } - BatchInsert::tableInsertBatch($table, array_keys(reset($missingIdArchives)), $params, $throwException = false); + BatchInsert::tableInsertBatch($table, array_keys(reset($missingIdArchives)), $params, $throwException = false, $charset = 'latin1'); } catch (\Exception $ex) { Updater::handleQueryError($ex, "<batch insert>", false, __FILE__); } diff --git a/plugins/Diagnostics/Diagnostic/LoadDataInfileCheck.php b/plugins/Diagnostics/Diagnostic/LoadDataInfileCheck.php index 2c69111ebb..0d06ceef04 100644 --- a/plugins/Diagnostics/Diagnostic/LoadDataInfileCheck.php +++ b/plugins/Diagnostics/Diagnostic/LoadDataInfileCheck.php @@ -50,7 +50,8 @@ class LoadDataInfileCheck implements Diagnostic array($testOptionNames[0], '1'), array($testOptionNames[1], '2'), ), - $throwException = true + $throwException = true, + $charset = 'latin1' ); } catch (\Exception $ex) { $errorMessage = str_replace("\n", "<br/>", $ex->getMessage()); diff --git a/tests/PHPUnit/Integration/ArchiveProcessingTest.php b/tests/PHPUnit/Integration/ArchiveProcessingTest.php index 42e95d4b43..358528270f 100644 --- a/tests/PHPUnit/Integration/ArchiveProcessingTest.php +++ b/tests/PHPUnit/Integration/ArchiveProcessingTest.php @@ -17,6 +17,7 @@ use Piwik\DataAccess\ArchiveTableCreator; use Piwik\Date; use Piwik\Db; use Piwik\Db\BatchInsert; +use Piwik\DbHelper; use Piwik\Period; use Piwik\Piwik; use Piwik\Plugins\SitesManager\API; @@ -306,11 +307,12 @@ class ArchiveProcessingTest extends IntegrationTestCase $didWeUseBulk = BatchInsert::tableInsertBatch($table, array('idsite', 'url'), $data, - $throwException = true); + $throwException = true, 'utf8'); } catch (Exception $e) { $didWeUseBulk = $e->getMessage(); } + $this->_checkLoadDataInFileWasUsed($didWeUseBulk); if ($didWeUseBulk === true) { @@ -377,7 +379,7 @@ class ArchiveProcessingTest extends IntegrationTestCase $didWeUseBulk = BatchInsert::tableInsertBatch($table, array('idarchive', 'name', 'idsite', 'date1', 'date2', 'period', 'ts_archived', 'value'), $data, - $throwException = true); + $throwException = true, $charset = 'latin1'); } catch (Exception $e) { $didWeUseBulk = $e->getMessage(); } @@ -388,7 +390,7 @@ class ArchiveProcessingTest extends IntegrationTestCase $this->_checkTableIsExpectedBlob($table, $data); } // INSERT again the bulk. Because we use keyword LOCAL the data will be REPLACED automatically (see mysql doc) - $didWeUseBulk = BatchInsert::tableInsertBatch($table, array('idarchive', 'name', 'idsite', 'date1', 'date2', 'period', 'ts_archived', 'value'), $data); + $didWeUseBulk = BatchInsert::tableInsertBatch($table, array('idarchive', 'name', 'idsite', 'date1', 'date2', 'period', 'ts_archived', 'value'), $data, $throw = false, $charset = 'latin1'); if ($didWeUseBulk === true) { $this->_checkTableIsExpectedBlob($table, $data); } @@ -421,9 +423,10 @@ class ArchiveProcessingTest extends IntegrationTestCase protected function _checkTableIsExpected($table, $data) { $fetched = Db::fetchAll('SELECT * FROM ' . $table); + foreach ($data as $id => $row) { - $this->assertEquals($fetched[$id]['idsite'], $data[$id][0], "record $id is not {$data[$id][0]}"); - $this->assertEquals($fetched[$id]['url'], $data[$id][1], "Record $id bug, not {$data[$id][1]} BUT {$fetched[$id]['url']}"); + $this->assertEquals($data[$id][0], $fetched[$id]['idsite'], "record $id is not {$data[$id][0]}"); + $this->assertEquals($data[$id][1], $fetched[$id]['url'], "Record $id bug, not {$data[$id][1]} BUT {$fetched[$id]['url']}"); } } @@ -484,6 +487,7 @@ class ArchiveProcessingTest extends IntegrationTestCase for ($i = 0; $i < 256; $i++) { $str .= chr($i); } + $array[] = array(1, 'bytes 0-255', 1, '2011-03-31', '2011-03-31', Piwik::$idPeriods['day'], $ts, $str); $array[] = array(2, 'compressed string', 1, '2011-03-31', '2011-03-31', Piwik::$idPeriods['day'], $ts, gzcompress(" \n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942\n \r \t teste eigaj oegheao geaoh guoea98742983 2 342942")); -- GitLab