##// END OF EJS Templates
Encoding::InvalidByteSequenceError may be raised even if encoding is valid (#12787)....
Jean-Philippe Lang -
r10948:45f870cb53ea
parent child
Show More
@@ -1,158 +1,160
1 if RUBY_VERSION < '1.9'
1 if RUBY_VERSION < '1.9'
2 require 'iconv'
2 require 'iconv'
3 end
3 end
4
4
5 module Redmine
5 module Redmine
6 module CodesetUtil
6 module CodesetUtil
7
7
8 def self.replace_invalid_utf8(str)
8 def self.replace_invalid_utf8(str)
9 return str if str.nil?
9 return str if str.nil?
10 if str.respond_to?(:force_encoding)
10 if str.respond_to?(:force_encoding)
11 str.force_encoding('UTF-8')
11 str.force_encoding('UTF-8')
12 if ! str.valid_encoding?
12 if ! str.valid_encoding?
13 str = str.encode("US-ASCII", :invalid => :replace,
13 str = str.encode("US-ASCII", :invalid => :replace,
14 :undef => :replace, :replace => '?').encode("UTF-8")
14 :undef => :replace, :replace => '?').encode("UTF-8")
15 end
15 end
16 elsif RUBY_PLATFORM == 'java'
16 elsif RUBY_PLATFORM == 'java'
17 begin
17 begin
18 ic = Iconv.new('UTF-8', 'UTF-8')
18 ic = Iconv.new('UTF-8', 'UTF-8')
19 str = ic.iconv(str)
19 str = ic.iconv(str)
20 rescue
20 rescue
21 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
21 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
22 end
22 end
23 else
23 else
24 ic = Iconv.new('UTF-8', 'UTF-8')
24 ic = Iconv.new('UTF-8', 'UTF-8')
25 txtar = ""
25 txtar = ""
26 begin
26 begin
27 txtar += ic.iconv(str)
27 txtar += ic.iconv(str)
28 rescue Iconv::IllegalSequence
28 rescue Iconv::IllegalSequence
29 txtar += $!.success
29 txtar += $!.success
30 str = '?' + $!.failed[1,$!.failed.length]
30 str = '?' + $!.failed[1,$!.failed.length]
31 retry
31 retry
32 rescue
32 rescue
33 txtar += $!.success
33 txtar += $!.success
34 end
34 end
35 str = txtar
35 str = txtar
36 end
36 end
37 str
37 str
38 end
38 end
39
39
40 def self.to_utf8(str, encoding)
40 def self.to_utf8(str, encoding)
41 return str if str.nil?
41 return str if str.nil?
42 str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding)
42 str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding)
43 if str.empty?
43 if str.empty?
44 str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
44 str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
45 return str
45 return str
46 end
46 end
47 enc = encoding.blank? ? "UTF-8" : encoding
47 enc = encoding.blank? ? "UTF-8" : encoding
48 if str.respond_to?(:force_encoding)
48 if str.respond_to?(:force_encoding)
49 if enc.upcase != "UTF-8"
49 if enc.upcase != "UTF-8"
50 str.force_encoding(enc)
50 str.force_encoding(enc)
51 str = str.encode("UTF-8", :invalid => :replace,
51 str = str.encode("UTF-8", :invalid => :replace,
52 :undef => :replace, :replace => '?')
52 :undef => :replace, :replace => '?')
53 else
53 else
54 str.force_encoding("UTF-8")
54 str.force_encoding("UTF-8")
55 if ! str.valid_encoding?
55 if ! str.valid_encoding?
56 str = str.encode("US-ASCII", :invalid => :replace,
56 str = str.encode("US-ASCII", :invalid => :replace,
57 :undef => :replace, :replace => '?').encode("UTF-8")
57 :undef => :replace, :replace => '?').encode("UTF-8")
58 end
58 end
59 end
59 end
60 elsif RUBY_PLATFORM == 'java'
60 elsif RUBY_PLATFORM == 'java'
61 begin
61 begin
62 ic = Iconv.new('UTF-8', enc)
62 ic = Iconv.new('UTF-8', enc)
63 str = ic.iconv(str)
63 str = ic.iconv(str)
64 rescue
64 rescue
65 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
65 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
66 end
66 end
67 else
67 else
68 ic = Iconv.new('UTF-8', enc)
68 ic = Iconv.new('UTF-8', enc)
69 txtar = ""
69 txtar = ""
70 begin
70 begin
71 txtar += ic.iconv(str)
71 txtar += ic.iconv(str)
72 rescue Iconv::IllegalSequence
72 rescue Iconv::IllegalSequence
73 txtar += $!.success
73 txtar += $!.success
74 str = '?' + $!.failed[1,$!.failed.length]
74 str = '?' + $!.failed[1,$!.failed.length]
75 retry
75 retry
76 rescue
76 rescue
77 txtar += $!.success
77 txtar += $!.success
78 end
78 end
79 str = txtar
79 str = txtar
80 end
80 end
81 str
81 str
82 end
82 end
83
83
84 def self.to_utf8_by_setting(str)
84 def self.to_utf8_by_setting(str)
85 return str if str.nil?
85 return str if str.nil?
86 str = self.to_utf8_by_setting_internal(str)
86 str = self.to_utf8_by_setting_internal(str)
87 if str.respond_to?(:force_encoding)
87 if str.respond_to?(:force_encoding)
88 str.force_encoding('UTF-8')
88 str.force_encoding('UTF-8')
89 end
89 end
90 str
90 str
91 end
91 end
92
92
93 def self.to_utf8_by_setting_internal(str)
93 def self.to_utf8_by_setting_internal(str)
94 return str if str.nil?
94 return str if str.nil?
95 if str.respond_to?(:force_encoding)
95 if str.respond_to?(:force_encoding)
96 str.force_encoding('ASCII-8BIT')
96 str.force_encoding('ASCII-8BIT')
97 end
97 end
98 return str if str.empty?
98 return str if str.empty?
99 return str if /\A[\r\n\t\x20-\x7e]*\Z/n.match(str) # for us-ascii
99 return str if /\A[\r\n\t\x20-\x7e]*\Z/n.match(str) # for us-ascii
100 if str.respond_to?(:force_encoding)
100 if str.respond_to?(:force_encoding)
101 str.force_encoding('UTF-8')
101 str.force_encoding('UTF-8')
102 end
102 end
103 encodings = Setting.repositories_encodings.split(',').collect(&:strip)
103 encodings = Setting.repositories_encodings.split(',').collect(&:strip)
104 encodings.each do |encoding|
104 encodings.each do |encoding|
105 if str.respond_to?(:force_encoding)
105 if str.respond_to?(:force_encoding)
106 str.force_encoding(encoding)
106 str.force_encoding(encoding)
107 if str.valid_encoding?
107 begin
108 return str.encode('UTF-8')
108 return str.encode('UTF-8')
109 rescue Encoding::InvalidByteSequenceError
110 # do nothing here and try the next encoding
109 end
111 end
110 else
112 else
111 begin
113 begin
112 return Iconv.conv('UTF-8', encoding, str)
114 return Iconv.conv('UTF-8', encoding, str)
113 rescue Iconv::Failure
115 rescue Iconv::Failure
114 # do nothing here and try the next encoding
116 # do nothing here and try the next encoding
115 end
117 end
116 end
118 end
117 end
119 end
118 str = self.replace_invalid_utf8(str)
120 str = self.replace_invalid_utf8(str)
119 if str.respond_to?(:force_encoding)
121 if str.respond_to?(:force_encoding)
120 str.force_encoding('UTF-8')
122 str.force_encoding('UTF-8')
121 end
123 end
122 str
124 str
123 end
125 end
124
126
125 def self.from_utf8(str, encoding)
127 def self.from_utf8(str, encoding)
126 str ||= ''
128 str ||= ''
127 if str.respond_to?(:force_encoding)
129 if str.respond_to?(:force_encoding)
128 str.force_encoding('UTF-8')
130 str.force_encoding('UTF-8')
129 if encoding.upcase != 'UTF-8'
131 if encoding.upcase != 'UTF-8'
130 str = str.encode(encoding, :invalid => :replace,
132 str = str.encode(encoding, :invalid => :replace,
131 :undef => :replace, :replace => '?')
133 :undef => :replace, :replace => '?')
132 else
134 else
133 str = self.replace_invalid_utf8(str)
135 str = self.replace_invalid_utf8(str)
134 end
136 end
135 elsif RUBY_PLATFORM == 'java'
137 elsif RUBY_PLATFORM == 'java'
136 begin
138 begin
137 ic = Iconv.new(encoding, 'UTF-8')
139 ic = Iconv.new(encoding, 'UTF-8')
138 str = ic.iconv(str)
140 str = ic.iconv(str)
139 rescue
141 rescue
140 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
142 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
141 end
143 end
142 else
144 else
143 ic = Iconv.new(encoding, 'UTF-8')
145 ic = Iconv.new(encoding, 'UTF-8')
144 txtar = ""
146 txtar = ""
145 begin
147 begin
146 txtar += ic.iconv(str)
148 txtar += ic.iconv(str)
147 rescue Iconv::IllegalSequence
149 rescue Iconv::IllegalSequence
148 txtar += $!.success
150 txtar += $!.success
149 str = '?' + $!.failed[1, $!.failed.length]
151 str = '?' + $!.failed[1, $!.failed.length]
150 retry
152 retry
151 rescue
153 rescue
152 txtar += $!.success
154 txtar += $!.success
153 end
155 end
154 str = txtar
156 str = txtar
155 end
157 end
156 end
158 end
157 end
159 end
158 end
160 end
General Comments 0
You need to be logged in to leave comments. Login now