##// END OF EJS Templates
scm: replace invalid utf-8 sequences in comments instead of stripping on Ruby 1.8....
Toshi MARUYAMA -
r5253:6536c53e0973
parent child
Show More
@@ -255,8 +255,8 class Changeset < ActiveRecord::Base
255 str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
255 str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
256 return str
256 return str
257 end
257 end
258 enc = encoding.blank? ? "UTF-8" : encoding
258 if str.respond_to?(:force_encoding)
259 if str.respond_to?(:force_encoding)
259 enc = encoding.blank? ? "UTF-8" : encoding
260 if enc != "UTF-8"
260 if enc != "UTF-8"
261 str.force_encoding(enc)
261 str.force_encoding(enc)
262 str = str.encode("UTF-8", :invalid => :replace,
262 str = str.encode("UTF-8", :invalid => :replace,
@@ -269,19 +269,18 class Changeset < ActiveRecord::Base
269 end
269 end
270 end
270 end
271 else
271 else
272 unless encoding.blank? || encoding == 'UTF-8'
272 ic = Iconv.new('UTF-8', enc)
273 begin
273 txtar = ""
274 str = Iconv.conv('UTF-8', encoding, str)
275 rescue Iconv::Failure
276 # do nothing here
277 end
278 end
279 # removes invalid UTF8 sequences
280 begin
274 begin
281 str = Iconv.conv('UTF-8//IGNORE', 'UTF-8', str + ' ')[0..-3]
275 txtar += ic.iconv(str)
282 rescue Iconv::InvalidEncoding
276 rescue Iconv::IllegalSequence
283 # "UTF-8//IGNORE" is not supported on some OS
277 txtar += $!.success
278 str = '?' + $!.failed[1,$!.failed.length]
279 retry
280 rescue
281 txtar += $!.success
284 end
282 end
283 str = txtar
285 end
284 end
286 str
285 str
287 end
286 end
@@ -21,7 +21,8 require File.expand_path('../../test_helper', __FILE__)
21
21
22 class ChangesetTest < ActiveSupport::TestCase
22 class ChangesetTest < ActiveSupport::TestCase
23 fixtures :projects, :repositories, :issues, :issue_statuses,
23 fixtures :projects, :repositories, :issues, :issue_statuses,
24 :changesets, :changes, :issue_categories, :enumerations, :custom_fields, :custom_values, :users, :members, :member_roles, :trackers
24 :changesets, :changes, :issue_categories, :enumerations,
25 :custom_fields, :custom_values, :users, :members, :member_roles, :trackers
25
26
26 def setup
27 def setup
27 end
28 end
@@ -250,29 +251,26 class ChangesetTest < ActiveSupport::TestCase
250 assert_equal str_utf8, c.comments
251 assert_equal str_utf8, c.comments
251 end
252 end
252
253
253 def test_invalid_utf8_sequences_in_comments_should_be_stripped
254 def test_invalid_utf8_sequences_in_comments_should_be_replaced_latin1
254 proj = Project.find(3)
255 proj = Project.find(3)
255 # str = File.read("#{RAILS_ROOT}/test/fixtures/encoding/iso-8859-1.txt")
256 # str = File.read("#{RAILS_ROOT}/test/fixtures/encoding/iso-8859-1.txt")
256 str = "Texte encod\xe9 en ISO-8859-1."
257 str = "Texte encod\xe9 en ISO-8859-1."
257 str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding)
258 str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding)
258 r = Repository::Bazaar.create!(
259 r = Repository::Bazaar.create!(
259 :project => proj, :url => '/tmp/test/bazaar',
260 :project => proj,
261 :url => '/tmp/test/bazaar',
260 :log_encoding => 'UTF-8' )
262 :log_encoding => 'UTF-8' )
261 assert r
263 assert r
262 c = Changeset.new(:repository => r,
264 c = Changeset.new(:repository => r,
263 :committed_on => Time.now,
265 :committed_on => Time.now,
264 :revision => '123',
266 :revision => '123',
265 :scmid => '12345',
267 :scmid => '12345',
266 :comments => str)
268 :comments => str)
267 assert( c.save )
269 assert( c.save )
268 if str.respond_to?(:force_encoding)
270 assert_equal "Texte encod? en ISO-8859-1.", c.comments
269 assert_equal "Texte encod? en ISO-8859-1.", c.comments
270 else
271 assert_equal "Texte encod en ISO-8859-1.", c.comments
272 end
273 end
271 end
274
272
275 def test_invalid_utf8_sequences_in_comments_should_be_stripped_ja_jis
273 def test_invalid_utf8_sequences_in_comments_should_be_replaced_ja_jis
276 proj = Project.find(3)
274 proj = Project.find(3)
277 str = "test\xb5\xfetest\xb5\xfe"
275 str = "test\xb5\xfetest\xb5\xfe"
278 if str.respond_to?(:force_encoding)
276 if str.respond_to?(:force_encoding)
@@ -280,7 +278,7 class ChangesetTest < ActiveSupport::TestCase
280 end
278 end
281 r = Repository::Bazaar.create!(
279 r = Repository::Bazaar.create!(
282 :project => proj,
280 :project => proj,
283 :url => '/tmp/test/bazaar',
281 :url => '/tmp/test/bazaar',
284 :log_encoding => 'ISO-2022-JP' )
282 :log_encoding => 'ISO-2022-JP' )
285 assert r
283 assert r
286 c = Changeset.new(:repository => r,
284 c = Changeset.new(:repository => r,
@@ -289,11 +287,7 class ChangesetTest < ActiveSupport::TestCase
289 :scmid => '12345',
287 :scmid => '12345',
290 :comments => str)
288 :comments => str)
291 assert( c.save )
289 assert( c.save )
292 if str.respond_to?(:force_encoding)
290 assert_equal "test??test??", c.comments
293 assert_equal "test??test??", c.comments
294 else
295 assert_equal "testtest", c.comments
296 end
297 end
291 end
298
292
299 def test_comments_should_be_converted_all_latin1_to_utf8
293 def test_comments_should_be_converted_all_latin1_to_utf8
General Comments 0
You need to be logged in to leave comments. Login now