From 322d738d54e7a9e236999e31f76f9d31aa3c77eb Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Fri, 26 Dec 2014 13:42:52 -0600 Subject: [PATCH] Moving some lessons learned from travis and the differ in --- lib/rspec/support/encoded_string.rb | 39 ++++------------------- spec/rspec/support/encoded_string_spec.rb | 30 +++++++++++------ 2 files changed, 28 insertions(+), 41 deletions(-) diff --git a/lib/rspec/support/encoded_string.rb b/lib/rspec/support/encoded_string.rb index 966031c10..6531ba641 100644 --- a/lib/rspec/support/encoded_string.rb +++ b/lib/rspec/support/encoded_string.rb @@ -43,42 +43,17 @@ def to_s "U+%.4X=" % x.unpack("U") rescue ArgumentError x.each_codepoint.to_a.map { |cp| '\x' << cp.to_s(16).upcase }.join - # JRUBY: expected: "?\\xAE" - # got: "??" end end def matching_encoding(string) - string.encode(@encoding, - :invalid => :replace, - :replace => REPLACE[source_encoding], - :fallback => UNDEF_FALLBACK - ) - rescue Encoding::ConverterNotFoundError, ArgumentError - # 1.9.2 Encoding::ConverterNotFoundError: - # code converter not found (UTF-16LE to IBM737) - string.encode(@encoding, - :invalid => :replace, - :replace => '', - :undef => :replace - ) - rescue Encoding::UndefinedConversionError - # 1.9.2 Encoding::UndefinedConversionError: - # "\xAE" from ASCII-8BIT to UTF-8 - # 1.9.2 expected no Exception, got # - # - string.encode(@encoding, - :invalid => :replace, - :replace => '', - :undef => :replace - ) - rescue ArgumentError - # JRUBY, 1.9.3, 1.9.2 - # ArgumentError: - # invalid byte sequence in UTF-8 - string. - encode(::Encoding::UTF_16LE, :invalid => :replace, :undef => :replace, :replace => ''). - encode(@encoding) + # Converting it to a higher higher character set (UTF-16) and then + # back (to UTF-8) ensures that you will strip away invalid or undefined byte sequences. + string. + encode(::Encoding::UTF_16LE, :invalid => :replace, :undef => :replace, :replace => REPLACE[source_encoding]). + encode(@encoding) + rescue Encoding::CompatibilityError + string.encode(@encoding, :invalid => :replace, :replace => REPLACE[source_encoding], :fallback => UNDEF_FALLBACK) end def detect_source_encoding(string) diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index a20032a66..dc62f6f39 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -26,19 +26,19 @@ module RSpec::Support it 'does nothing to an invalid byte sequence' do source_encoding = Encoding.find('UTF8-MAC') incompatible_encoding = Encoding.find('IBM737') - string = "\xEF hi I am not going to change".force_encoding(source_encoding) + string = "\xEF I am not going to changé".force_encoding(source_encoding) resulting_string = build_encoded_string(string, incompatible_encoding) - expect(resulting_string.to_s).to eq("\xEF hi I am not going to change".force_encoding(incompatible_encoding)) + expect(resulting_string.to_s).to eq("\xEF I am not going to changeé".force_encoding(incompatible_encoding)) end it 'does nothing to unconvertable characters' do source_encoding = Encoding.find('UTF-16LE') incompatible_encoding = Encoding.find('IBM737') - string = "\xEF hi I am not going to change".force_encoding(source_encoding) + string = "\xEF I am not going to change".force_encoding(source_encoding) resulting_string = build_encoded_string(string, incompatible_encoding) - expect(resulting_string.to_s).to eq("\xEF hi I am not going to change".force_encoding(incompatible_encoding)) + expect(resulting_string.to_s).to eq("\xEF I am not going to change".force_encoding(incompatible_encoding)) end else @@ -58,7 +58,7 @@ module RSpec::Support it 'replaces all bytes with a unicode representation in the format e.g. U+20EF=' do source_encoding = Encoding.find('UTF-16LE') incompatible_encoding = Encoding.find('IBM737') - string = "hi I am not going to work".force_encoding(source_encoding) + string = "\xEF hi I am not going to work".force_encoding(source_encoding) if defined?(JRUBY_VERSION) expect{ string.encode(incompatible_encoding) }.to raise_error(Encoding::InvalidByteSequenceError) # expected Encoding::UndefinedConversionError, got # with backtrace: @@ -67,7 +67,7 @@ module RSpec::Support end resulting_string = build_encoded_string(string, incompatible_encoding) - expect(resulting_string.to_s).to eq("U+6968=U+4920=U+6120=U+206D=U+6F6E=U+2074=U+6F67=U+6E69=U+2067=U+6F74=U+7720=U+726F=?") + expect(resulting_string.to_s).to eq("U+20EF=U+6968=U+4920=U+6120=U+206D=U+6F6E=U+2074=U+6F67=U+6E69=U+2067=U+6F74=U+7720=U+726F=?") end end end @@ -80,11 +80,23 @@ module RSpec::Support describe '#<<' do context 'with strings that can be converted to the target encoding' do it 'encodes and appends the string' do - valid_ascii_string = "abc".force_encoding("ASCII-8BIT") + valid_ascii_string = "abcdé".force_encoding("ASCII-8BIT") valid_unicode_string = utf_8_euro_symbol.force_encoding('UTF-8') resulting_string = build_encoded_string(valid_unicode_string, target_encoding) << valid_ascii_string - expect(resulting_string).to eq "#{utf_8_euro_symbol}abc".force_encoding('UTF-8') + expect(resulting_string).to eq "#{utf_8_euro_symbol}abcd??".force_encoding('UTF-8') + end + + it 'uses the default external encoding when the two strings have incompatible encodings' do + utf8_string = build_encoded_string("Tu avec carte {count} item has\n", "UTF-8") + utf8_incompatible_string = "Tu avec carté {count} itém has\n".encode('UTF-16LE') + + resulting_string = utf8_string << utf8_incompatible_string + expect(resulting_string).to eq(<<-EOS) +Tu avec carte {count} item has +Tu avec carté {count} itém has + EOS + expect(resulting_string.encoding).to eq(Encoding.default_external) end end @@ -96,7 +108,7 @@ module RSpec::Support expect{ valid_unicode_string.encode(target_encoding) << ascii_string }.to raise_error(Encoding::CompatibilityError) resulting_string = build_encoded_string(valid_unicode_string, target_encoding) << ascii_string - expect(resulting_string).to eq("#{utf_8_euro_symbol}\\xAE") + expect(resulting_string).to eq("#{utf_8_euro_symbol}?") end end end