@@ -1,158 +1,160 | |||||
1 | if RUBY_VERSION < '1.9' |
|
1 | if RUBY_VERSION < '1.9' | |
2 | require 'iconv' |
|
2 | require 'iconv' | |
3 | end |
|
3 | end | |
4 |
|
4 | |||
5 | module Redmine |
|
5 | module Redmine | |
6 | module CodesetUtil |
|
6 | module CodesetUtil | |
7 |
|
7 | |||
8 | def self.replace_invalid_utf8(str) |
|
8 | def self.replace_invalid_utf8(str) | |
9 | return str if str.nil? |
|
9 | return str if str.nil? | |
10 | if str.respond_to?(:force_encoding) |
|
10 | if str.respond_to?(:force_encoding) | |
11 | str.force_encoding('UTF-8') |
|
11 | str.force_encoding('UTF-8') | |
12 | if ! str.valid_encoding? |
|
12 | if ! str.valid_encoding? | |
13 | str = str.encode("US-ASCII", :invalid => :replace, |
|
13 | str = str.encode("US-ASCII", :invalid => :replace, | |
14 | :undef => :replace, :replace => '?').encode("UTF-8") |
|
14 | :undef => :replace, :replace => '?').encode("UTF-8") | |
15 | end |
|
15 | end | |
16 | elsif RUBY_PLATFORM == 'java' |
|
16 | elsif RUBY_PLATFORM == 'java' | |
17 | begin |
|
17 | begin | |
18 | ic = Iconv.new('UTF-8', 'UTF-8') |
|
18 | ic = Iconv.new('UTF-8', 'UTF-8') | |
19 | str = ic.iconv(str) |
|
19 | str = ic.iconv(str) | |
20 | rescue |
|
20 | rescue | |
21 | str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?') |
|
21 | str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?') | |
22 | end |
|
22 | end | |
23 | else |
|
23 | else | |
24 | ic = Iconv.new('UTF-8', 'UTF-8') |
|
24 | ic = Iconv.new('UTF-8', 'UTF-8') | |
25 | txtar = "" |
|
25 | txtar = "" | |
26 | begin |
|
26 | begin | |
27 | txtar += ic.iconv(str) |
|
27 | txtar += ic.iconv(str) | |
28 | rescue Iconv::IllegalSequence |
|
28 | rescue Iconv::IllegalSequence | |
29 | txtar += $!.success |
|
29 | txtar += $!.success | |
30 | str = '?' + $!.failed[1,$!.failed.length] |
|
30 | str = '?' + $!.failed[1,$!.failed.length] | |
31 | retry |
|
31 | retry | |
32 | rescue |
|
32 | rescue | |
33 | txtar += $!.success |
|
33 | txtar += $!.success | |
34 | end |
|
34 | end | |
35 | str = txtar |
|
35 | str = txtar | |
36 | end |
|
36 | end | |
37 | str |
|
37 | str | |
38 | end |
|
38 | end | |
39 |
|
39 | |||
40 | def self.to_utf8(str, encoding) |
|
40 | def self.to_utf8(str, encoding) | |
41 | return str if str.nil? |
|
41 | return str if str.nil? | |
42 | str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding) |
|
42 | str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding) | |
43 | if str.empty? |
|
43 | if str.empty? | |
44 | str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) |
|
44 | str.force_encoding("UTF-8") if str.respond_to?(:force_encoding) | |
45 | return str |
|
45 | return str | |
46 | end |
|
46 | end | |
47 | enc = encoding.blank? ? "UTF-8" : encoding |
|
47 | enc = encoding.blank? ? "UTF-8" : encoding | |
48 | if str.respond_to?(:force_encoding) |
|
48 | if str.respond_to?(:force_encoding) | |
49 | if enc.upcase != "UTF-8" |
|
49 | if enc.upcase != "UTF-8" | |
50 | str.force_encoding(enc) |
|
50 | str.force_encoding(enc) | |
51 | str = str.encode("UTF-8", :invalid => :replace, |
|
51 | str = str.encode("UTF-8", :invalid => :replace, | |
52 | :undef => :replace, :replace => '?') |
|
52 | :undef => :replace, :replace => '?') | |
53 | else |
|
53 | else | |
54 | str.force_encoding("UTF-8") |
|
54 | str.force_encoding("UTF-8") | |
55 | if ! str.valid_encoding? |
|
55 | if ! str.valid_encoding? | |
56 | str = str.encode("US-ASCII", :invalid => :replace, |
|
56 | str = str.encode("US-ASCII", :invalid => :replace, | |
57 | :undef => :replace, :replace => '?').encode("UTF-8") |
|
57 | :undef => :replace, :replace => '?').encode("UTF-8") | |
58 | end |
|
58 | end | |
59 | end |
|
59 | end | |
60 | elsif RUBY_PLATFORM == 'java' |
|
60 | elsif RUBY_PLATFORM == 'java' | |
61 | begin |
|
61 | begin | |
62 | ic = Iconv.new('UTF-8', enc) |
|
62 | ic = Iconv.new('UTF-8', enc) | |
63 | str = ic.iconv(str) |
|
63 | str = ic.iconv(str) | |
64 | rescue |
|
64 | rescue | |
65 | str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?') |
|
65 | str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?') | |
66 | end |
|
66 | end | |
67 | else |
|
67 | else | |
68 | ic = Iconv.new('UTF-8', enc) |
|
68 | ic = Iconv.new('UTF-8', enc) | |
69 | txtar = "" |
|
69 | txtar = "" | |
70 | begin |
|
70 | begin | |
71 | txtar += ic.iconv(str) |
|
71 | txtar += ic.iconv(str) | |
72 | rescue Iconv::IllegalSequence |
|
72 | rescue Iconv::IllegalSequence | |
73 | txtar += $!.success |
|
73 | txtar += $!.success | |
74 | str = '?' + $!.failed[1,$!.failed.length] |
|
74 | str = '?' + $!.failed[1,$!.failed.length] | |
75 | retry |
|
75 | retry | |
76 | rescue |
|
76 | rescue | |
77 | txtar += $!.success |
|
77 | txtar += $!.success | |
78 | end |
|
78 | end | |
79 | str = txtar |
|
79 | str = txtar | |
80 | end |
|
80 | end | |
81 | str |
|
81 | str | |
82 | end |
|
82 | end | |
83 |
|
83 | |||
84 | def self.to_utf8_by_setting(str) |
|
84 | def self.to_utf8_by_setting(str) | |
85 | return str if str.nil? |
|
85 | return str if str.nil? | |
86 | str = self.to_utf8_by_setting_internal(str) |
|
86 | str = self.to_utf8_by_setting_internal(str) | |
87 | if str.respond_to?(:force_encoding) |
|
87 | if str.respond_to?(:force_encoding) | |
88 | str.force_encoding('UTF-8') |
|
88 | str.force_encoding('UTF-8') | |
89 | end |
|
89 | end | |
90 | str |
|
90 | str | |
91 | end |
|
91 | end | |
92 |
|
92 | |||
93 | def self.to_utf8_by_setting_internal(str) |
|
93 | def self.to_utf8_by_setting_internal(str) | |
94 | return str if str.nil? |
|
94 | return str if str.nil? | |
95 | if str.respond_to?(:force_encoding) |
|
95 | if str.respond_to?(:force_encoding) | |
96 | str.force_encoding('ASCII-8BIT') |
|
96 | str.force_encoding('ASCII-8BIT') | |
97 | end |
|
97 | end | |
98 | return str if str.empty? |
|
98 | return str if str.empty? | |
99 | return str if /\A[\r\n\t\x20-\x7e]*\Z/n.match(str) # for us-ascii |
|
99 | return str if /\A[\r\n\t\x20-\x7e]*\Z/n.match(str) # for us-ascii | |
100 | if str.respond_to?(:force_encoding) |
|
100 | if str.respond_to?(:force_encoding) | |
101 | str.force_encoding('UTF-8') |
|
101 | str.force_encoding('UTF-8') | |
102 | end |
|
102 | end | |
103 | encodings = Setting.repositories_encodings.split(',').collect(&:strip) |
|
103 | encodings = Setting.repositories_encodings.split(',').collect(&:strip) | |
104 | encodings.each do |encoding| |
|
104 | encodings.each do |encoding| | |
105 | if str.respond_to?(:force_encoding) |
|
105 | if str.respond_to?(:force_encoding) | |
106 | str.force_encoding(encoding) |
|
106 | str.force_encoding(encoding) | |
107 | if str.valid_encoding? |
|
107 | begin | |
108 | return str.encode('UTF-8') |
|
108 | return str.encode('UTF-8') | |
|
109 | rescue Encoding::InvalidByteSequenceError | |||
|
110 | # do nothing here and try the next encoding | |||
109 | end |
|
111 | end | |
110 | else |
|
112 | else | |
111 | begin |
|
113 | begin | |
112 | return Iconv.conv('UTF-8', encoding, str) |
|
114 | return Iconv.conv('UTF-8', encoding, str) | |
113 | rescue Iconv::Failure |
|
115 | rescue Iconv::Failure | |
114 | # do nothing here and try the next encoding |
|
116 | # do nothing here and try the next encoding | |
115 | end |
|
117 | end | |
116 | end |
|
118 | end | |
117 | end |
|
119 | end | |
118 | str = self.replace_invalid_utf8(str) |
|
120 | str = self.replace_invalid_utf8(str) | |
119 | if str.respond_to?(:force_encoding) |
|
121 | if str.respond_to?(:force_encoding) | |
120 | str.force_encoding('UTF-8') |
|
122 | str.force_encoding('UTF-8') | |
121 | end |
|
123 | end | |
122 | str |
|
124 | str | |
123 | end |
|
125 | end | |
124 |
|
126 | |||
125 | def self.from_utf8(str, encoding) |
|
127 | def self.from_utf8(str, encoding) | |
126 | str ||= '' |
|
128 | str ||= '' | |
127 | if str.respond_to?(:force_encoding) |
|
129 | if str.respond_to?(:force_encoding) | |
128 | str.force_encoding('UTF-8') |
|
130 | str.force_encoding('UTF-8') | |
129 | if encoding.upcase != 'UTF-8' |
|
131 | if encoding.upcase != 'UTF-8' | |
130 | str = str.encode(encoding, :invalid => :replace, |
|
132 | str = str.encode(encoding, :invalid => :replace, | |
131 | :undef => :replace, :replace => '?') |
|
133 | :undef => :replace, :replace => '?') | |
132 | else |
|
134 | else | |
133 | str = self.replace_invalid_utf8(str) |
|
135 | str = self.replace_invalid_utf8(str) | |
134 | end |
|
136 | end | |
135 | elsif RUBY_PLATFORM == 'java' |
|
137 | elsif RUBY_PLATFORM == 'java' | |
136 | begin |
|
138 | begin | |
137 | ic = Iconv.new(encoding, 'UTF-8') |
|
139 | ic = Iconv.new(encoding, 'UTF-8') | |
138 | str = ic.iconv(str) |
|
140 | str = ic.iconv(str) | |
139 | rescue |
|
141 | rescue | |
140 | str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?') |
|
142 | str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?') | |
141 | end |
|
143 | end | |
142 | else |
|
144 | else | |
143 | ic = Iconv.new(encoding, 'UTF-8') |
|
145 | ic = Iconv.new(encoding, 'UTF-8') | |
144 | txtar = "" |
|
146 | txtar = "" | |
145 | begin |
|
147 | begin | |
146 | txtar += ic.iconv(str) |
|
148 | txtar += ic.iconv(str) | |
147 | rescue Iconv::IllegalSequence |
|
149 | rescue Iconv::IllegalSequence | |
148 | txtar += $!.success |
|
150 | txtar += $!.success | |
149 | str = '?' + $!.failed[1, $!.failed.length] |
|
151 | str = '?' + $!.failed[1, $!.failed.length] | |
150 | retry |
|
152 | retry | |
151 | rescue |
|
153 | rescue | |
152 | txtar += $!.success |
|
154 | txtar += $!.success | |
153 | end |
|
155 | end | |
154 | str = txtar |
|
156 | str = txtar | |
155 | end |
|
157 | end | |
156 | end |
|
158 | end | |
157 | end |
|
159 | end | |
158 | end |
|
160 | end |
General Comments 0
You need to be logged in to leave comments.
Login now