From 580e87466e984b1f5a8a3c5b2101a2672f646a89 Mon Sep 17 00:00:00 2001 From: Dave Miller Date: Sun, 25 Aug 2024 21:49:40 -0400 Subject: [PATCH] Bug 1898882: Detect UTF8 naming in DB (#139) * Detect at installation/upgrade time how the database refers to the utf8 variant Bugzilla wants to use so that we don't accidentally reconvert the database to the same encoding it's already using on every run on checksetup.pl a=dylan --- Bugzilla/Config/Common.pm | 7 +++-- Bugzilla/Config/General.pm | 9 +++++- Bugzilla/DB/MariaDB.pm | 31 ++++++++++++++++--- Bugzilla/DB/Mysql.pm | 31 ++++++++++++++++--- .../en/default/admin/params/common.html.tmpl | 5 +++ .../en/default/admin/params/general.html.tmpl | 9 ++++-- 6 files changed, 76 insertions(+), 16 deletions(-) diff --git a/Bugzilla/Config/Common.pm b/Bugzilla/Config/Common.pm index 19bf9c068e..14d72115a5 100644 --- a/Bugzilla/Config/Common.pm +++ b/Bugzilla/Config/Common.pm @@ -88,12 +88,15 @@ sub check_email { sub check_utf8 { my ($utf8, $entry) = @_; - # You cannot turn off the UTF-8 parameter. + my $current_utf8 = Bugzilla->params->{'utf8'}; if (!$utf8) { return "You cannot disable UTF-8 support."; } - elsif ($entry eq 'utf8mb4' && $utf8 ne 'utf8mb4') { + elsif ($current_utf8 eq 'utf8mb3' && $utf8 ne 'utf8mb3' && $utf8 ne 'utf8mb4') { + return "You cannot downgrade from utf8mb3 support, only keep it or change to utf8mb4."; + } + elsif ($current_utf8 eq 'utf8mb4' && $utf8 ne 'utf8mb4') { return "You cannot disable UTF8-MB4 support."; } diff --git a/Bugzilla/Config/General.pm b/Bugzilla/Config/General.pm index 5d8ab09463..f6cd369c62 100644 --- a/Bugzilla/Config/General.pm +++ b/Bugzilla/Config/General.pm @@ -34,11 +34,18 @@ use constant get_param_list => ( { name => 'utf8', type => 's', - choices => ['1', 'utf8', 'utf8mb4'], + choices => ['1', 'utf8', 'utf8mb3', 'utf8mb4'], default => 'utf8', checker => \&check_utf8 }, + { + name => 'utf8_collate', + type => 'r', + no_reset => '1', + default => 'utf8mb4_unicode_520_ci', + }, + {name => 'announcehtml', type => 'l', default => ''}, { diff --git a/Bugzilla/DB/MariaDB.pm b/Bugzilla/DB/MariaDB.pm index 3f99afd001..3edc13474b 100644 --- a/Bugzilla/DB/MariaDB.pm +++ b/Bugzilla/DB/MariaDB.pm @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB); use Bugzilla::Constants; use Bugzilla::Install::Util qw(install_string); +use Bugzilla::Config; use Bugzilla::Util; use Bugzilla::Error; use Bugzilla::DB::Schema::MariaDB; @@ -312,6 +313,24 @@ sub bz_check_server_version { sub bz_setup_database { my ($self) = @_; + # Before touching anything else, find out whether this database server does + # any aliasing of the character set we plan to use so we can check for + # already converted tables properly. We do this by creating a table as our + # intended charset and then test how it reads back. + my $db_name = Bugzilla->localconfig->{db_name}; + my $charset = $self->utf8_charset; + my $collate = $self->utf8_collate; + $self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate); + my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name); + $self->do("DROP TABLE `utf8_test`"); + my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/); + Bugzilla->params->{'utf8'} = $found_charset; + Bugzilla->params->{'utf8_collate'} = $found_collate; + Bugzilla::Config::write_params(); + # reload these because they get used later. + $charset = $self->utf8_charset; + $collate = $self->utf8_collate; + # The "comments" field of the bugs_fulltext table could easily exceed # MySQL's default max_allowed_packet. Also, MySQL should never have # a max_allowed_packet smaller than our max_attachment_size. So, we @@ -404,7 +423,6 @@ sub bz_setup_database { } # Upgrade tables from MyISAM to InnoDB - my $db_name = Bugzilla->localconfig->db_name; my $myisam_tables = $self->selectcol_arrayref( 'SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM' @@ -629,8 +647,6 @@ sub bz_setup_database { # the table charsets. # # TABLE_COLLATION IS NOT NULL prevents us from trying to convert views. - my $charset = $self->utf8_charset; - my $collate = $self->utf8_collate; my $non_utf8_tables = $self->selectrow_array( "SELECT 1 FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL @@ -836,11 +852,16 @@ sub _fix_defaults { } sub utf8_charset { - return 'utf8mb4'; + return 'utf8mb4' unless Bugzilla->params->{'utf8'}; + return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1'; + return Bugzilla->params->{'utf8'}; } sub utf8_collate { - return 'utf8mb4_unicode_520_ci'; + my $charset = utf8_charset(); + return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'}; + return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/); + return Bugzilla->params->{'utf8_collate'}; } sub default_row_format { diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm index 1f3a03fd92..c287a0eb95 100644 --- a/Bugzilla/DB/Mysql.pm +++ b/Bugzilla/DB/Mysql.pm @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB); use Bugzilla::Constants; use Bugzilla::Install::Util qw(install_string); +use Bugzilla::Config; use Bugzilla::Util; use Bugzilla::Error; use Bugzilla::DB::Schema::Mysql; @@ -313,6 +314,24 @@ sub bz_check_server_version { sub bz_setup_database { my ($self) = @_; + # Before touching anything else, find out whether this database server does + # any aliasing of the character set we plan to use so we can check for + # already converted tables properly. We do this by creating a table as our + # intended charset and then test how it reads back. + my $db_name = Bugzilla->localconfig->{db_name}; + my $charset = $self->utf8_charset; + my $collate = $self->utf8_collate; + $self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate); + my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name); + $self->do("DROP TABLE `utf8_test`"); + my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/); + Bugzilla->params->{'utf8'} = $found_charset; + Bugzilla->params->{'utf8_collate'} = $found_collate; + Bugzilla::Config::write_params(); + # reload these because they get used later. + $charset = $self->utf8_charset; + $collate = $self->utf8_collate; + # The "comments" field of the bugs_fulltext table could easily exceed # MySQL's default max_allowed_packet. Also, MySQL should never have # a max_allowed_packet smaller than our max_attachment_size. So, we @@ -405,7 +424,6 @@ sub bz_setup_database { } # Upgrade tables from MyISAM to InnoDB - my $db_name = Bugzilla->localconfig->db_name; my $myisam_tables = $self->selectcol_arrayref( 'SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM' @@ -630,8 +648,6 @@ sub bz_setup_database { # the table charsets. # # TABLE_COLLATION IS NOT NULL prevents us from trying to convert views. - my $charset = $self->utf8_charset; - my $collate = $self->utf8_collate; my $non_utf8_tables = $self->selectrow_array( "SELECT 1 FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL @@ -837,11 +853,16 @@ sub _fix_defaults { } sub utf8_charset { - return 'utf8mb4'; + return 'utf8mb4' unless Bugzilla->params->{'utf8'}; + return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1'; + return Bugzilla->params->{'utf8'}; } sub utf8_collate { - return 'utf8mb4_unicode_520_ci'; + my $charset = utf8_charset(); + return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'}; + return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/); + return Bugzilla->params->{'utf8_collate'}; } sub default_row_format { diff --git a/template/en/default/admin/params/common.html.tmpl b/template/en/default/admin/params/common.html.tmpl index 011bcda166..0a0bb540b1 100644 --- a/template/en/default/admin/params/common.html.tmpl +++ b/template/en/default/admin/params/common.html.tmpl @@ -44,6 +44,11 @@ [% IF param.type == "t" %] + [% ELSIF param.type == "r" %] + +
+ This value is read-only and you can't change it. [% ELSIF param.type == "p" %] contrib/recode.pl" _ " script." - _ "

Note that if you turn this parameter from "off" to" - _ " "on", you must re-run checksetup.pl immediately" - _ " afterward.

", + _ "

Note that if you change this parameter you must re-run" + _ " checksetup.pl immediately afterward.

", + + utf8_collate => + "The collation to use in database tables. This parameter is" + _ " automatically set by checksetup.pl.", announcehtml => "If this field is non-empty, then $terms.Bugzilla will display whatever is"