##// END OF EJS Templates
Encoding::InvalidByteSequenceError may be raised even if encoding is valid (#12787)....
Jean-Philippe Lang -
r10948:45f870cb53ea
parent child
Show More
@@ -1,158 +1,160
1 1 if RUBY_VERSION < '1.9'
2 2 require 'iconv'
3 3 end
4 4
5 5 module Redmine
6 6 module CodesetUtil
7 7
8 8 def self.replace_invalid_utf8(str)
9 9 return str if str.nil?
10 10 if str.respond_to?(:force_encoding)
11 11 str.force_encoding('UTF-8')
12 12 if ! str.valid_encoding?
13 13 str = str.encode("US-ASCII", :invalid => :replace,
14 14 :undef => :replace, :replace => '?').encode("UTF-8")
15 15 end
16 16 elsif RUBY_PLATFORM == 'java'
17 17 begin
18 18 ic = Iconv.new('UTF-8', 'UTF-8')
19 19 str = ic.iconv(str)
20 20 rescue
21 21 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
22 22 end
23 23 else
24 24 ic = Iconv.new('UTF-8', 'UTF-8')
25 25 txtar = ""
26 26 begin
27 27 txtar += ic.iconv(str)
28 28 rescue Iconv::IllegalSequence
29 29 txtar += $!.success
30 30 str = '?' + $!.failed[1,$!.failed.length]
31 31 retry
32 32 rescue
33 33 txtar += $!.success
34 34 end
35 35 str = txtar
36 36 end
37 37 str
38 38 end
39 39
40 40 def self.to_utf8(str, encoding)
41 41 return str if str.nil?
42 42 str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding)
43 43 if str.empty?
44 44 str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
45 45 return str
46 46 end
47 47 enc = encoding.blank? ? "UTF-8" : encoding
48 48 if str.respond_to?(:force_encoding)
49 49 if enc.upcase != "UTF-8"
50 50 str.force_encoding(enc)
51 51 str = str.encode("UTF-8", :invalid => :replace,
52 52 :undef => :replace, :replace => '?')
53 53 else
54 54 str.force_encoding("UTF-8")
55 55 if ! str.valid_encoding?
56 56 str = str.encode("US-ASCII", :invalid => :replace,
57 57 :undef => :replace, :replace => '?').encode("UTF-8")
58 58 end
59 59 end
60 60 elsif RUBY_PLATFORM == 'java'
61 61 begin
62 62 ic = Iconv.new('UTF-8', enc)
63 63 str = ic.iconv(str)
64 64 rescue
65 65 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
66 66 end
67 67 else
68 68 ic = Iconv.new('UTF-8', enc)
69 69 txtar = ""
70 70 begin
71 71 txtar += ic.iconv(str)
72 72 rescue Iconv::IllegalSequence
73 73 txtar += $!.success
74 74 str = '?' + $!.failed[1,$!.failed.length]
75 75 retry
76 76 rescue
77 77 txtar += $!.success
78 78 end
79 79 str = txtar
80 80 end
81 81 str
82 82 end
83 83
84 84 def self.to_utf8_by_setting(str)
85 85 return str if str.nil?
86 86 str = self.to_utf8_by_setting_internal(str)
87 87 if str.respond_to?(:force_encoding)
88 88 str.force_encoding('UTF-8')
89 89 end
90 90 str
91 91 end
92 92
93 93 def self.to_utf8_by_setting_internal(str)
94 94 return str if str.nil?
95 95 if str.respond_to?(:force_encoding)
96 96 str.force_encoding('ASCII-8BIT')
97 97 end
98 98 return str if str.empty?
99 99 return str if /\A[\r\n\t\x20-\x7e]*\Z/n.match(str) # for us-ascii
100 100 if str.respond_to?(:force_encoding)
101 101 str.force_encoding('UTF-8')
102 102 end
103 103 encodings = Setting.repositories_encodings.split(',').collect(&:strip)
104 104 encodings.each do |encoding|
105 105 if str.respond_to?(:force_encoding)
106 106 str.force_encoding(encoding)
107 if str.valid_encoding?
107 begin
108 108 return str.encode('UTF-8')
109 rescue Encoding::InvalidByteSequenceError
110 # do nothing here and try the next encoding
109 111 end
110 112 else
111 113 begin
112 114 return Iconv.conv('UTF-8', encoding, str)
113 115 rescue Iconv::Failure
114 116 # do nothing here and try the next encoding
115 117 end
116 118 end
117 119 end
118 120 str = self.replace_invalid_utf8(str)
119 121 if str.respond_to?(:force_encoding)
120 122 str.force_encoding('UTF-8')
121 123 end
122 124 str
123 125 end
124 126
125 127 def self.from_utf8(str, encoding)
126 128 str ||= ''
127 129 if str.respond_to?(:force_encoding)
128 130 str.force_encoding('UTF-8')
129 131 if encoding.upcase != 'UTF-8'
130 132 str = str.encode(encoding, :invalid => :replace,
131 133 :undef => :replace, :replace => '?')
132 134 else
133 135 str = self.replace_invalid_utf8(str)
134 136 end
135 137 elsif RUBY_PLATFORM == 'java'
136 138 begin
137 139 ic = Iconv.new(encoding, 'UTF-8')
138 140 str = ic.iconv(str)
139 141 rescue
140 142 str = str.gsub(%r{[^\r\n\t\x20-\x7e]}, '?')
141 143 end
142 144 else
143 145 ic = Iconv.new(encoding, 'UTF-8')
144 146 txtar = ""
145 147 begin
146 148 txtar += ic.iconv(str)
147 149 rescue Iconv::IllegalSequence
148 150 txtar += $!.success
149 151 str = '?' + $!.failed[1, $!.failed.length]
150 152 retry
151 153 rescue
152 154 txtar += $!.success
153 155 end
154 156 str = txtar
155 157 end
156 158 end
157 159 end
158 160 end
General Comments 0
You need to be logged in to leave comments. Login now