@@ -1,68 +1,68 | |||||
1 |
|
1 | |||
2 | module Redmine |
|
2 | module Redmine | |
3 | module CodesetUtil |
|
3 | module CodesetUtil | |
4 |
|
4 | |||
5 | def self.replace_invalid_utf8(str) |
|
5 | def self.replace_invalid_utf8(str) | |
6 | return str if str.nil? |
|
6 | return str if str.nil? | |
7 | str.force_encoding('UTF-8') |
|
7 | str.force_encoding('UTF-8') | |
8 | if ! str.valid_encoding? |
|
8 | if ! str.valid_encoding? | |
9 |
str = str.encode("U |
|
9 | str = str.encode("UTF-16LE", :invalid => :replace, | |
10 | :undef => :replace, :replace => '?').encode("UTF-8") |
|
10 | :undef => :replace, :replace => '?').encode("UTF-8") | |
11 | end |
|
11 | end | |
12 | str |
|
12 | str | |
13 | end |
|
13 | end | |
14 |
|
14 | |||
15 | def self.to_utf8(str, encoding) |
|
15 | def self.to_utf8(str, encoding) | |
16 | return str if str.nil? |
|
16 | return str if str.nil? | |
17 | str.force_encoding("ASCII-8BIT") |
|
17 | str.force_encoding("ASCII-8BIT") | |
18 | if str.empty? |
|
18 | if str.empty? | |
19 | str.force_encoding("UTF-8") |
|
19 | str.force_encoding("UTF-8") | |
20 | return str |
|
20 | return str | |
21 | end |
|
21 | end | |
22 | enc = encoding.blank? ? "UTF-8" : encoding |
|
22 | enc = encoding.blank? ? "UTF-8" : encoding | |
23 | if enc.upcase != "UTF-8" |
|
23 | if enc.upcase != "UTF-8" | |
24 | str.force_encoding(enc) |
|
24 | str.force_encoding(enc) | |
25 | str = str.encode("UTF-8", :invalid => :replace, |
|
25 | str = str.encode("UTF-8", :invalid => :replace, | |
26 | :undef => :replace, :replace => '?') |
|
26 | :undef => :replace, :replace => '?') | |
27 | else |
|
27 | else | |
28 | str = replace_invalid_utf8(str) |
|
28 | str = replace_invalid_utf8(str) | |
29 | end |
|
29 | end | |
30 | str |
|
30 | str | |
31 | end |
|
31 | end | |
32 |
|
32 | |||
33 | def self.to_utf8_by_setting(str) |
|
33 | def self.to_utf8_by_setting(str) | |
34 | return str if str.nil? |
|
34 | return str if str.nil? | |
35 | self.to_utf8_by_setting_internal(str).force_encoding('UTF-8') |
|
35 | self.to_utf8_by_setting_internal(str).force_encoding('UTF-8') | |
36 | end |
|
36 | end | |
37 |
|
37 | |||
38 | def self.to_utf8_by_setting_internal(str) |
|
38 | def self.to_utf8_by_setting_internal(str) | |
39 | return str if str.nil? |
|
39 | return str if str.nil? | |
40 | str.force_encoding('ASCII-8BIT') |
|
40 | str.force_encoding('ASCII-8BIT') | |
41 | return str if str.empty? |
|
41 | return str if str.empty? | |
42 | return str if /\A[\r\n\t\x20-\x7e]*\Z/n.match(str) # for us-ascii |
|
42 | return str if /\A[\r\n\t\x20-\x7e]*\Z/n.match(str) # for us-ascii | |
43 | str.force_encoding('UTF-8') |
|
43 | str.force_encoding('UTF-8') | |
44 | encodings = Setting.repositories_encodings.split(',').collect(&:strip) |
|
44 | encodings = Setting.repositories_encodings.split(',').collect(&:strip) | |
45 | encodings.each do |encoding| |
|
45 | encodings.each do |encoding| | |
46 | begin |
|
46 | begin | |
47 | str.force_encoding(encoding) |
|
47 | str.force_encoding(encoding) | |
48 | utf8 = str.encode('UTF-8') |
|
48 | utf8 = str.encode('UTF-8') | |
49 | return utf8 if utf8.valid_encoding? |
|
49 | return utf8 if utf8.valid_encoding? | |
50 | rescue |
|
50 | rescue | |
51 | # do nothing here and try the next encoding |
|
51 | # do nothing here and try the next encoding | |
52 | end |
|
52 | end | |
53 | end |
|
53 | end | |
54 | self.replace_invalid_utf8(str).force_encoding('UTF-8') |
|
54 | self.replace_invalid_utf8(str).force_encoding('UTF-8') | |
55 | end |
|
55 | end | |
56 |
|
56 | |||
57 | def self.from_utf8(str, encoding) |
|
57 | def self.from_utf8(str, encoding) | |
58 | str ||= '' |
|
58 | str ||= '' | |
59 | str.force_encoding('UTF-8') |
|
59 | str.force_encoding('UTF-8') | |
60 | if encoding.upcase != 'UTF-8' |
|
60 | if encoding.upcase != 'UTF-8' | |
61 | str = str.encode(encoding, :invalid => :replace, |
|
61 | str = str.encode(encoding, :invalid => :replace, | |
62 | :undef => :replace, :replace => '?') |
|
62 | :undef => :replace, :replace => '?') | |
63 | else |
|
63 | else | |
64 | str = self.replace_invalid_utf8(str) |
|
64 | str = self.replace_invalid_utf8(str) | |
65 | end |
|
65 | end | |
66 | end |
|
66 | end | |
67 | end |
|
67 | end | |
68 | end |
|
68 | end |
@@ -1,104 +1,104 | |||||
1 | # Redmine - project management software |
|
1 | # Redmine - project management software | |
2 | # Copyright (C) 2006-2016 Jean-Philippe Lang |
|
2 | # Copyright (C) 2006-2016 Jean-Philippe Lang | |
3 | # |
|
3 | # | |
4 | # This program is free software; you can redistribute it and/or |
|
4 | # This program is free software; you can redistribute it and/or | |
5 | # modify it under the terms of the GNU General Public License |
|
5 | # modify it under the terms of the GNU General Public License | |
6 | # as published by the Free Software Foundation; either version 2 |
|
6 | # as published by the Free Software Foundation; either version 2 | |
7 | # of the License, or (at your option) any later version. |
|
7 | # of the License, or (at your option) any later version. | |
8 | # |
|
8 | # | |
9 | # This program is distributed in the hope that it will be useful, |
|
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU General Public License for more details. |
|
12 | # GNU General Public License for more details. | |
13 | # |
|
13 | # | |
14 | # You should have received a copy of the GNU General Public License |
|
14 | # You should have received a copy of the GNU General Public License | |
15 | # along with this program; if not, write to the Free Software |
|
15 | # along with this program; if not, write to the Free Software | |
16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
17 |
|
17 | |||
18 | require File.expand_path('../../../../test_helper', __FILE__) |
|
18 | require File.expand_path('../../../../test_helper', __FILE__) | |
19 |
|
19 | |||
20 | class Redmine::CodesetUtilTest < ActiveSupport::TestCase |
|
20 | class Redmine::CodesetUtilTest < ActiveSupport::TestCase | |
21 |
|
21 | |||
22 | def test_to_utf8_by_setting_from_latin1 |
|
22 | def test_to_utf8_by_setting_from_latin1 | |
23 | with_settings :repositories_encodings => 'UTF-8,ISO-8859-1' do |
|
23 | with_settings :repositories_encodings => 'UTF-8,ISO-8859-1' do | |
24 | s1 = "Texte encod\xc3\xa9".force_encoding("UTF-8") |
|
24 | s1 = "Texte encod\xc3\xa9".force_encoding("UTF-8") | |
25 | s2 = "Texte encod\xe9".force_encoding("ASCII-8BIT") |
|
25 | s2 = "Texte encod\xe9".force_encoding("ASCII-8BIT") | |
26 | s3 = s2.dup.force_encoding("UTF-8") |
|
26 | s3 = s2.dup.force_encoding("UTF-8") | |
27 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s2) |
|
27 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s2) | |
28 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s3) |
|
28 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s3) | |
29 | end |
|
29 | end | |
30 | end |
|
30 | end | |
31 |
|
31 | |||
32 | def test_to_utf8_by_setting_from_euc_jp |
|
32 | def test_to_utf8_by_setting_from_euc_jp | |
33 | with_settings :repositories_encodings => 'UTF-8,EUC-JP' do |
|
33 | with_settings :repositories_encodings => 'UTF-8,EUC-JP' do | |
34 | s1 = "\xe3\x83\xac\xe3\x83\x83\xe3\x83\x89\xe3\x83\x9e\xe3\x82\xa4\xe3\x83\xb3".force_encoding("UTF-8") |
|
34 | s1 = "\xe3\x83\xac\xe3\x83\x83\xe3\x83\x89\xe3\x83\x9e\xe3\x82\xa4\xe3\x83\xb3".force_encoding("UTF-8") | |
35 | s2 = "\xa5\xec\xa5\xc3\xa5\xc9\xa5\xde\xa5\xa4\xa5\xf3".force_encoding("ASCII-8BIT") |
|
35 | s2 = "\xa5\xec\xa5\xc3\xa5\xc9\xa5\xde\xa5\xa4\xa5\xf3".force_encoding("ASCII-8BIT") | |
36 | s3 = s2.dup.force_encoding("UTF-8") |
|
36 | s3 = s2.dup.force_encoding("UTF-8") | |
37 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s2) |
|
37 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s2) | |
38 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s3) |
|
38 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s3) | |
39 | end |
|
39 | end | |
40 | end |
|
40 | end | |
41 |
|
41 | |||
42 | def test_to_utf8_by_setting_should_be_converted_all_latin1 |
|
42 | def test_to_utf8_by_setting_should_be_converted_all_latin1 | |
43 | with_settings :repositories_encodings => 'ISO-8859-1' do |
|
43 | with_settings :repositories_encodings => 'ISO-8859-1' do | |
44 | s1 = "\xc3\x82\xc2\x80".force_encoding("UTF-8") |
|
44 | s1 = "\xc3\x82\xc2\x80".force_encoding("UTF-8") | |
45 | s2 = "\xC2\x80".force_encoding("ASCII-8BIT") |
|
45 | s2 = "\xC2\x80".force_encoding("ASCII-8BIT") | |
46 | s3 = s2.dup.force_encoding("UTF-8") |
|
46 | s3 = s2.dup.force_encoding("UTF-8") | |
47 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s2) |
|
47 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s2) | |
48 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s3) |
|
48 | assert_equal s1, Redmine::CodesetUtil.to_utf8_by_setting(s3) | |
49 | end |
|
49 | end | |
50 | end |
|
50 | end | |
51 |
|
51 | |||
52 | def test_to_utf8_by_setting_blank_string |
|
52 | def test_to_utf8_by_setting_blank_string | |
53 | assert_equal "", Redmine::CodesetUtil.to_utf8_by_setting("") |
|
53 | assert_equal "", Redmine::CodesetUtil.to_utf8_by_setting("") | |
54 | assert_nil Redmine::CodesetUtil.to_utf8_by_setting(nil) |
|
54 | assert_nil Redmine::CodesetUtil.to_utf8_by_setting(nil) | |
55 | end |
|
55 | end | |
56 |
|
56 | |||
57 | def test_to_utf8_by_setting_returns_ascii_as_utf8 |
|
57 | def test_to_utf8_by_setting_returns_ascii_as_utf8 | |
58 | s1 = "ASCII".force_encoding("UTF-8") |
|
58 | s1 = "ASCII".force_encoding("UTF-8") | |
59 | s2 = s1.dup.force_encoding("ISO-8859-1") |
|
59 | s2 = s1.dup.force_encoding("ISO-8859-1") | |
60 | str1 = Redmine::CodesetUtil.to_utf8_by_setting(s1) |
|
60 | str1 = Redmine::CodesetUtil.to_utf8_by_setting(s1) | |
61 | str2 = Redmine::CodesetUtil.to_utf8_by_setting(s2) |
|
61 | str2 = Redmine::CodesetUtil.to_utf8_by_setting(s2) | |
62 | assert_equal s1, str1 |
|
62 | assert_equal s1, str1 | |
63 | assert_equal s1, str2 |
|
63 | assert_equal s1, str2 | |
64 | assert_equal "UTF-8", str1.encoding.to_s |
|
64 | assert_equal "UTF-8", str1.encoding.to_s | |
65 | assert_equal "UTF-8", str2.encoding.to_s |
|
65 | assert_equal "UTF-8", str2.encoding.to_s | |
66 | end |
|
66 | end | |
67 |
|
67 | |||
68 | def test_to_utf8_by_setting_invalid_utf8_sequences_should_be_stripped |
|
68 | def test_to_utf8_by_setting_invalid_utf8_sequences_should_be_stripped | |
69 | with_settings :repositories_encodings => '' do |
|
69 | with_settings :repositories_encodings => '' do | |
70 | # s1 = File.read("#{RAILS_ROOT}/test/fixtures/encoding/iso-8859-1.txt") |
|
70 | # s1 = File.read("#{RAILS_ROOT}/test/fixtures/encoding/iso-8859-1.txt") | |
71 | s1 = "Texte encod\xe9 en ISO-8859-1.".force_encoding("ASCII-8BIT") |
|
71 | s1 = "Texte encod\xe9 en ISO-8859-1.".force_encoding("ASCII-8BIT") | |
72 | str = Redmine::CodesetUtil.to_utf8_by_setting(s1) |
|
72 | str = Redmine::CodesetUtil.to_utf8_by_setting(s1) | |
73 | assert str.valid_encoding? |
|
73 | assert str.valid_encoding? | |
74 | assert_equal "UTF-8", str.encoding.to_s |
|
74 | assert_equal "UTF-8", str.encoding.to_s | |
75 | assert_equal "Texte encod? en ISO-8859-1.", str |
|
75 | assert_equal "Texte encod? en ISO-8859-1.", str | |
76 | end |
|
76 | end | |
77 | end |
|
77 | end | |
78 |
|
78 | |||
79 | def test_to_utf8_by_setting_invalid_utf8_sequences_should_be_stripped_ja_jis |
|
79 | def test_to_utf8_by_setting_invalid_utf8_sequences_should_be_stripped_ja_jis | |
80 | with_settings :repositories_encodings => 'ISO-2022-JP' do |
|
80 | with_settings :repositories_encodings => 'ISO-2022-JP' do | |
81 | s1 = "test\xb5\xfetest\xb5\xfe".force_encoding("ASCII-8BIT") |
|
81 | s1 = "test\xb5\xfetest\xb5\xfe".force_encoding("ASCII-8BIT") | |
82 | str = Redmine::CodesetUtil.to_utf8_by_setting(s1) |
|
82 | str = Redmine::CodesetUtil.to_utf8_by_setting(s1) | |
83 | assert str.valid_encoding? |
|
83 | assert str.valid_encoding? | |
84 | assert_equal "UTF-8", str.encoding.to_s |
|
84 | assert_equal "UTF-8", str.encoding.to_s | |
85 | assert_equal "test??test??", str |
|
85 | assert_equal "test??test??", str | |
86 | end |
|
86 | end | |
87 | end |
|
87 | end | |
88 |
|
88 | |||
89 | test "#replace_invalid_utf8 should replace invalid utf8" do |
|
89 | test "#replace_invalid_utf8 should replace invalid utf8" do | |
90 | s1 = "\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xE3\x81\xFF".force_encoding("UTF-8") |
|
90 | s1 = "\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xE3\x81\xFF".force_encoding("UTF-8") | |
91 | s2 = Redmine::CodesetUtil.replace_invalid_utf8(s1) |
|
91 | s2 = Redmine::CodesetUtil.replace_invalid_utf8(s1) | |
92 | assert s2.valid_encoding? |
|
92 | assert s2.valid_encoding? | |
93 | assert_equal "UTF-8", s2.encoding.to_s |
|
93 | assert_equal "UTF-8", s2.encoding.to_s | |
94 | assert_equal "??????", s2 |
|
94 | assert_equal "\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1??".force_encoding("UTF-8"), s2 | |
95 | end |
|
95 | end | |
96 |
|
96 | |||
97 | test "#to_utf8 should replace invalid non utf8" do |
|
97 | test "#to_utf8 should replace invalid non utf8" do | |
98 | s1 = "\xa4\xb3\xa4\xf3\xa4\xcb\xa4\xc1\xa4".force_encoding("EUC-JP") |
|
98 | s1 = "\xa4\xb3\xa4\xf3\xa4\xcb\xa4\xc1\xa4".force_encoding("EUC-JP") | |
99 | s2 = Redmine::CodesetUtil.to_utf8(s1, "EUC-JP") |
|
99 | s2 = Redmine::CodesetUtil.to_utf8(s1, "EUC-JP") | |
100 | assert s2.valid_encoding? |
|
100 | assert s2.valid_encoding? | |
101 | assert_equal "UTF-8", s2.encoding.to_s |
|
101 | assert_equal "UTF-8", s2.encoding.to_s | |
102 | assert_equal "\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1?".force_encoding("UTF-8"), s2 |
|
102 | assert_equal "\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1?".force_encoding("UTF-8"), s2 | |
103 | end |
|
103 | end | |
104 | end |
|
104 | end |
General Comments 0
You need to be logged in to leave comments.
Login now