Skip to content

Commit

Permalink
Bug 1898882: Detect UTF8 naming in DB (#139)
Browse files Browse the repository at this point in the history
* Detect at installation/upgrade time how the database refers to the utf8 variant Bugzilla wants to use so that we don't accidentally reconvert the database to the same encoding it's already using on every run on checksetup.pl
a=dylan
  • Loading branch information
justdave authored Aug 26, 2024
1 parent 6e45dcf commit 580e874
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 16 deletions.
7 changes: 5 additions & 2 deletions Bugzilla/Config/Common.pm
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,15 @@ sub check_email {

sub check_utf8 {
my ($utf8, $entry) = @_;

# You cannot turn off the UTF-8 parameter.
my $current_utf8 = Bugzilla->params->{'utf8'};
if (!$utf8) {
return "You cannot disable UTF-8 support.";
}
elsif ($entry eq 'utf8mb4' && $utf8 ne 'utf8mb4') {
elsif ($current_utf8 eq 'utf8mb3' && $utf8 ne 'utf8mb3' && $utf8 ne 'utf8mb4') {
return "You cannot downgrade from utf8mb3 support, only keep it or change to utf8mb4.";
}
elsif ($current_utf8 eq 'utf8mb4' && $utf8 ne 'utf8mb4') {
return "You cannot disable UTF8-MB4 support.";
}

Expand Down
9 changes: 8 additions & 1 deletion Bugzilla/Config/General.pm
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,18 @@ use constant get_param_list => (
{
name => 'utf8',
type => 's',
choices => ['1', 'utf8', 'utf8mb4'],
choices => ['1', 'utf8', 'utf8mb3', 'utf8mb4'],
default => 'utf8',
checker => \&check_utf8
},

{
name => 'utf8_collate',
type => 'r',
no_reset => '1',
default => 'utf8mb4_unicode_520_ci',
},

{name => 'announcehtml', type => 'l', default => ''},

{
Expand Down
31 changes: 26 additions & 5 deletions Bugzilla/DB/MariaDB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB);

use Bugzilla::Constants;
use Bugzilla::Install::Util qw(install_string);
use Bugzilla::Config;
use Bugzilla::Util;
use Bugzilla::Error;
use Bugzilla::DB::Schema::MariaDB;
Expand Down Expand Up @@ -312,6 +313,24 @@ sub bz_check_server_version {
sub bz_setup_database {
my ($self) = @_;

# Before touching anything else, find out whether this database server does
# any aliasing of the character set we plan to use so we can check for
# already converted tables properly. We do this by creating a table as our
# intended charset and then test how it reads back.
my $db_name = Bugzilla->localconfig->{db_name};
my $charset = $self->utf8_charset;
my $collate = $self->utf8_collate;
$self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate);
my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name);
$self->do("DROP TABLE `utf8_test`");
my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/);
Bugzilla->params->{'utf8'} = $found_charset;
Bugzilla->params->{'utf8_collate'} = $found_collate;
Bugzilla::Config::write_params();
# reload these because they get used later.
$charset = $self->utf8_charset;
$collate = $self->utf8_collate;

# The "comments" field of the bugs_fulltext table could easily exceed
# MySQL's default max_allowed_packet. Also, MySQL should never have
# a max_allowed_packet smaller than our max_attachment_size. So, we
Expand Down Expand Up @@ -404,7 +423,6 @@ sub bz_setup_database {
}

# Upgrade tables from MyISAM to InnoDB
my $db_name = Bugzilla->localconfig->db_name;
my $myisam_tables = $self->selectcol_arrayref(
'SELECT TABLE_NAME FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM'
Expand Down Expand Up @@ -629,8 +647,6 @@ sub bz_setup_database {
# the table charsets.
#
# TABLE_COLLATION IS NOT NULL prevents us from trying to convert views.
my $charset = $self->utf8_charset;
my $collate = $self->utf8_collate;
my $non_utf8_tables = $self->selectrow_array(
"SELECT 1 FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL
Expand Down Expand Up @@ -836,11 +852,16 @@ sub _fix_defaults {
}

sub utf8_charset {
return 'utf8mb4';
return 'utf8mb4' unless Bugzilla->params->{'utf8'};
return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1';
return Bugzilla->params->{'utf8'};
}

sub utf8_collate {
return 'utf8mb4_unicode_520_ci';
my $charset = utf8_charset();
return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'};
return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/);
return Bugzilla->params->{'utf8_collate'};
}

sub default_row_format {
Expand Down
31 changes: 26 additions & 5 deletions Bugzilla/DB/Mysql.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB);

use Bugzilla::Constants;
use Bugzilla::Install::Util qw(install_string);
use Bugzilla::Config;
use Bugzilla::Util;
use Bugzilla::Error;
use Bugzilla::DB::Schema::Mysql;
Expand Down Expand Up @@ -313,6 +314,24 @@ sub bz_check_server_version {
sub bz_setup_database {
my ($self) = @_;

# Before touching anything else, find out whether this database server does
# any aliasing of the character set we plan to use so we can check for
# already converted tables properly. We do this by creating a table as our
# intended charset and then test how it reads back.
my $db_name = Bugzilla->localconfig->{db_name};
my $charset = $self->utf8_charset;
my $collate = $self->utf8_collate;
$self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate);
my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name);
$self->do("DROP TABLE `utf8_test`");
my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/);
Bugzilla->params->{'utf8'} = $found_charset;
Bugzilla->params->{'utf8_collate'} = $found_collate;
Bugzilla::Config::write_params();
# reload these because they get used later.
$charset = $self->utf8_charset;
$collate = $self->utf8_collate;

# The "comments" field of the bugs_fulltext table could easily exceed
# MySQL's default max_allowed_packet. Also, MySQL should never have
# a max_allowed_packet smaller than our max_attachment_size. So, we
Expand Down Expand Up @@ -405,7 +424,6 @@ sub bz_setup_database {
}

# Upgrade tables from MyISAM to InnoDB
my $db_name = Bugzilla->localconfig->db_name;
my $myisam_tables = $self->selectcol_arrayref(
'SELECT TABLE_NAME FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM'
Expand Down Expand Up @@ -630,8 +648,6 @@ sub bz_setup_database {
# the table charsets.
#
# TABLE_COLLATION IS NOT NULL prevents us from trying to convert views.
my $charset = $self->utf8_charset;
my $collate = $self->utf8_collate;
my $non_utf8_tables = $self->selectrow_array(
"SELECT 1 FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL
Expand Down Expand Up @@ -837,11 +853,16 @@ sub _fix_defaults {
}

sub utf8_charset {
return 'utf8mb4';
return 'utf8mb4' unless Bugzilla->params->{'utf8'};
return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1';
return Bugzilla->params->{'utf8'};
}

sub utf8_collate {
return 'utf8mb4_unicode_520_ci';
my $charset = utf8_charset();
return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'};
return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/);
return Bugzilla->params->{'utf8_collate'};
}

sub default_row_format {
Expand Down
5 changes: 5 additions & 0 deletions template/en/default/admin/params/common.html.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
[% IF param.type == "t" %]
<input type="text" size="80" name="[% param.name FILTER html %]"
id="[% param.name FILTER html %]" value="[% Param(param.name) FILTER html %]">
[% ELSIF param.type == "r" %]
<input type="text" size="80" name="[% param.name FILTER html %]_readonly"
id="[% param.name FILTER html %]_readonly" value="[% Param(param.name) FILTER html %]" disabled>
<input type="hidden" name="[% param.name FILTER html %]" value="[% Param(param.name) FILTER html %]"><br>
This value is read-only and you can't change it.
[% ELSIF param.type == "p" %]
<input type="password" size="80" name="[% param.name FILTER html %]"
id="[% param.name FILTER html %]" value="[% Param(param.name) FILTER html %]"
Expand Down
9 changes: 6 additions & 3 deletions template/en/default/admin/params/general.html.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,12 @@
_ " only after the data has been converted from existing legacy"
_ " character encodings to UTF-8, using the <kbd>contrib/recode.pl</kbd>"
_ " script</strong>."
_ " <p>Note that if you turn this parameter from &quot;off&quot; to"
_ " &quot;on&quot;, you must re-run <kbd>checksetup.pl</kbd> immediately"
_ " afterward.</p>",
_ " <p>Note that if you change this parameter you must re-run"
_ " <kbd>checksetup.pl</kbd> immediately afterward.</p>",

utf8_collate =>
"The collation to use in database tables. This parameter is"
_ " automatically set by checksetup.pl.",

announcehtml =>
"If this field is non-empty, then $terms.Bugzilla will display whatever is"
Expand Down

0 comments on commit 580e874

Please sign in to comment.