diff --git a/lib/rspec/support/encoded_string.rb b/lib/rspec/support/encoded_string.rb index 3783fc0b2..4b5a98ead 100644 --- a/lib/rspec/support/encoded_string.rb +++ b/lib/rspec/support/encoded_string.rb @@ -4,7 +4,6 @@ module Support class EncodedString # Ruby's default replacement string for is U+FFFD ("\xEF\xBF\xBD") for Unicode encoding forms # else is '?' ("\x3F") - MRI_UNICODE_UNKOWN_CHARACTER = "\xEF\xBF\xBD" REPLACE = "\x3F" def initialize(string, encoding=nil) @@ -36,6 +35,24 @@ def to_s private + ENCODING_STRATEGY = { + :bad_bytes => { + :invalid => :replace, + # :undef => :nil, + :replace => REPLACE + }, + :cannot_convert => { + # :invalid => :nil, + :undef => :replace, + :replace => REPLACE + }, + :no_converter => { + :invalid => :replace, + # :undef => :nil, + :replace => REPLACE + } + } + # Raised by Encoding and String methods: # Encoding::UndefinedConversionError: # when a transcoding operation fails @@ -51,20 +68,19 @@ def to_s # Encoding::CompatibilityError # def matching_encoding(string) - string.encode(@encoding) - rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError - normalize_missing(string.encode(@encoding, :invalid => :replace, :undef => :replace)) + # Converting it to a higher character set (UTF-16) and then back (to UTF-8) + # ensures that we strip away invalid or undefined byte sequences + # => no need to rescue Encoding::InvalidByteSequenceError, ArgumentError + string.encode(::Encoding::UTF_16LE, ENCODING_STRATEGY[:bad_bytes]). + encode(@encoding) + rescue Encoding::UndefinedConversionError, Encoding::CompatibilityError + string.encode(@encoding, ENCODING_STRATEGY[:cannot_convert]) + # Begin: Needed for 1.9.2 rescue Encoding::ConverterNotFoundError - normalize_missing(string.force_encoding(@encoding).encode(:invalid => :replace)) + string.force_encoding(@encoding).encode(ENCODING_STRATEGY[:no_converter]) end + # End: Needed for 1.9.2 - def normalize_missing(string) - if @encoding.to_s == "UTF-8" - string.gsub(MRI_UNICODE_UNKOWN_CHARACTER.force_encoding(@encoding), REPLACE) - else - string - end - end def detect_source_encoding(string) string.encoding diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index 2ea256d4d..78d8f64f4 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -155,7 +155,6 @@ module RSpec::Support end it 'replaces invalid bytes with the REPLACE string' do - pending 'but is currently failing' resulting_array = build_encoded_string(message_with_invalid_byte_sequence, utf8_encoding).split("\n") expected_array = ["? ? ? I have bad bytes"] expect(resulting_array).to eq(expected_array)