rspec · bf4 · Nov 20, 2014 · Jan 3, 2015 · Jan 3, 2015 · Jan 3, 2015
diff --git a/.travis.yml b/.travis.yml
@@ -22,19 +22,24 @@ rvm:
   - 2.2
   - ruby-head
   - ree
-  - jruby-18mode
-  - jruby
-  - jruby-head
   - rbx
 matrix:
   include:
     - rvm: jruby
-      env: JRUBY_OPTS='--2.0'
-  allow_failures:
+      env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false -Xcompat.version=2.0'
     - rvm: jruby-head
+      env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false'
+    # These two are temporary until https://github.com/travis-ci/travis-ci/issues/3067 is solved.
+    - rvm: jruby-18mode
+      env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false'
+    - rvm: jruby
+      env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false'
+  allow_failures:
     - rvm: ruby-head
     - rvm: rbx
     # These two are temporary until https://github.com/travis-ci/travis-ci/issues/3067 is solved.
     - rvm: jruby-18mode
+      env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false'
     - rvm: jruby
+      env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false'
   fast_finish: true
diff --git a/Gemfile b/Gemfile
@@ -14,7 +14,8 @@ branch = File.read(File.expand_path("../maintenance-branch", __FILE__)).chomp
 end
 
 ### dep for ci/coverage
-gem 'simplecov', '~> 0.8'
+gem 'simplecov', '~> 0.9'
+gem 'simplecov-html', :github => 'colszowka/simplecov-html'
 
 gem 'rubocop', "~> 0.23.0", :platform => [:ruby_19, :ruby_20, :ruby_21]
 

diff --git a/lib/rspec/support/differ.rb b/lib/rspec/support/differ.rb
@@ -7,8 +7,14 @@ module RSpec
   module Support
     # rubocop:disable ClassLength
     class Differ
+      if String.method_defined?(:encoding)
+        EMPTY_DIFF = EncodedString.new("", Encoding.default_external)
+      else
+        EMPTY_DIFF = EncodedString.new("")
+      end
+
       def diff(actual, expected)
-        diff = ""
+        diff = EMPTY_DIFF.dup
 
         if actual && expected
           if all_strings?(actual, expected)
@@ -25,12 +31,10 @@ def diff(actual, expected)
 
       # rubocop:disable MethodLength
       def diff_as_string(actual, expected)
-        @encoding = pick_encoding actual, expected
-
+        @encoding = EncodedString.pick_encoding(actual, expected)
         @actual   = EncodedString.new(actual, @encoding)
         @expected = EncodedString.new(expected, @encoding)
-
-        output = EncodedString.new("\n", @encoding)
+        output    = EncodedString.new("\n", @encoding)
 
         hunks.each_cons(2) do |prev_hunk, current_hunk|
           begin
@@ -47,8 +51,6 @@ def diff_as_string(actual, expected)
         finalize_output(output, hunks.last.diff(format_type).to_s) if hunks.last
 
         color_diff output
-      rescue Encoding::CompatibilityError
-        handle_encoding_errors
       end
       # rubocop:enable MethodLength
 
@@ -188,26 +190,6 @@ def object_to_string(object)
           PP.pp(object, "")
         end
       end
-
-      if String.method_defined?(:encoding)
-        def pick_encoding(source_a, source_b)
-          Encoding.compatible?(source_a, source_b) || Encoding.default_external
-        end
-      else
-        def pick_encoding(_source_a, _source_b)
-        end
-      end
-
-      def handle_encoding_errors
-        if @actual.source_encoding != @expected.source_encoding
-          "Could not produce a diff because the encoding of the actual string " \
-          "(#{@actual.source_encoding}) differs from the encoding of the expected " \
-          "string (#{@expected.source_encoding})"
-        else
-          "Could not produce a diff because of the encoding of the string " \
-          "(#{@expected.source_encoding})"
-        end
-      end
     end
     # rubocop:enable ClassLength
   end

diff --git a/lib/rspec/support/encoded_string.rb b/lib/rspec/support/encoded_string.rb
@@ -2,7 +2,19 @@ module RSpec
   module Support
     # @private
     class EncodedString
-      MRI_UNICODE_UNKOWN_CHARACTER = "\xEF\xBF\xBD"
+      if String.method_defined?(:encoding)
+        # see https://github.com/ruby/ruby/blob/ca24e581ba/encoding.c#L1191
+        def self.pick_encoding(source_a, source_b)
+          Encoding.compatible?(source_a, source_b) || Encoding.default_external
+        end
+      else
+        def self.pick_encoding(_source_a, _source_b)
+        end
+      end
+
+      # Ruby's default replacement string for is U+FFFD ("\xEF\xBF\xBD") for Unicode encoding forms
+      #   else is '?' ("\x3F")
+      REPLACE = "\x3F"
 
       def initialize(string, encoding=nil)
         @encoding = encoding
@@ -33,21 +45,52 @@ def to_s
 
         private
 
+        ENCODING_STRATEGY = {
+          :bad_bytes => {
+            :invalid => :replace,
+            # :undef   => :nil,
+            :replace => REPLACE
+          },
+          :cannot_convert => {
+            # :invalid => :nil,
+            :undef   => :replace,
+            :replace => REPLACE
+          },
+          :no_converter => {
+            :invalid => :replace,
+            # :undef   => :nil,
+            :replace => REPLACE
+          }
+        }
+
+        # Raised by Encoding and String methods:
+        #   Encoding::UndefinedConversionError:
+        #     when a transcoding operation fails
+        #     e.g. "\x80".encode('utf-8','ASCII-8BIT')
+        #   Encoding::InvalidByteSequenceError:
+        #     when the string being transcoded contains a byte invalid for the either
+        #     the source or target encoding
+        #     e.g. "\x80".encode('utf-8','US-ASCII')
+        # Raised by transcoding methods:
+        #   Encoding::ConverterNotFoundError:
+        #     when a named encoding does not correspond with a known converter
+        #     e.g. 'abc'.force_encoding('utf-8').encode('foo')
+        # Encoding::CompatibilityError
+        #
         def matching_encoding(string)
-          string.encode(@encoding)
-        rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
-          normalize_missing(string.encode(@encoding, :invalid => :replace, :undef => :replace))
+          encoding = EncodedString.pick_encoding(source_encoding, @encoding)
+          # Converting it to a higher character set (UTF-16) and then back (to UTF-8)
+          # ensures that we strip away invalid or undefined byte sequences
+          # => no need to rescue Encoding::InvalidByteSequenceError, ArgumentError
+          string.encode(::Encoding::UTF_16LE, ENCODING_STRATEGY[:bad_bytes]).
+            encode(encoding)
+        rescue Encoding::UndefinedConversionError, Encoding::CompatibilityError
+          string.encode(encoding, ENCODING_STRATEGY[:cannot_convert])
+        # Begin: Needed for 1.9.2
         rescue Encoding::ConverterNotFoundError
-          normalize_missing(string.force_encoding(@encoding).encode(:invalid => :replace))
-        end
-
-        def normalize_missing(string)
-          if @encoding.to_s == "UTF-8"
-            string.gsub(MRI_UNICODE_UNKOWN_CHARACTER.force_encoding(@encoding), "?")
-          else
-            string
-          end
+          string.force_encoding(encoding).encode(ENCODING_STRATEGY[:no_converter])
         end
+        # End: Needed for 1.9.2
 
         def detect_source_encoding(string)
           string.encoding

diff --git a/lib/rspec/support/spec.rb b/lib/rspec/support/spec.rb
@@ -1,5 +1,6 @@
 require 'rspec/support'
 RSpec::Support.require_rspec_support "spec/deprecation_helpers"
+RSpec::Support.require_rspec_support "spec/encoding_helpers"
 RSpec::Support.require_rspec_support "spec/with_isolated_stderr"
 RSpec::Support.require_rspec_support "spec/stderr_splitter"
 RSpec::Support.require_rspec_support "spec/formatting_support"
@@ -12,6 +13,7 @@
   c.include RSpecHelpers
   c.include RSpec::Support::WithIsolatedStdErr
   c.include RSpec::Support::FormattingSupport
+  c.include RSpec::Support::EncodingHelpers
 
   unless defined?(Debugger) # debugger causes warnings when used
     c.before do

diff --git a/lib/rspec/support/spec/encoding_helpers.rb b/lib/rspec/support/spec/encoding_helpers.rb
@@ -0,0 +1,61 @@
+module RSpec
+  module Support
+    module EncodingHelpers
+      module_function
+
+      # For undefined conversions, replace as "U+<codepoint>"
+      # e.g. '\xa0' becomes 'U+00A0'
+      # see https://github.com/ruby/ruby/blob/34fbf57aaa/test/ruby/test_transcode.rb#L2050
+      def safe_chr
+        # rubocop:disable Style/RescueModifier
+        @safe_chr ||= Hash.new { |h, x| h[x] = x.chr rescue ("U+%.4X" % [x]) }
+        # rubocop:enable Style/RescueModifier
+      end
+
+      if String.method_defined?(:encoding)
+
+        def safe_codepoints(str)
+          str.each_codepoint.map { |codepoint| safe_chr[codepoint] }
+        rescue ArgumentError
+          str.each_byte.map { |byte| safe_chr[byte] }
+        end
+
+        # rubocop:disable MethodLength
+        def expect_identical_string(str1, str2, expected_encoding=str1.encoding)
+          expect(str1.encoding).to eq(expected_encoding)
+          str1_bytes = safe_codepoints(str1)
+          str2_bytes = safe_codepoints(str2)
+          return unless str1_bytes != str2_bytes
+          str1_differences = []
+          str2_differences = []
+          # rubocop:disable Style/Next
+          str2_bytes.each_with_index do |str2_byte, index|
+            str1_byte = str1_bytes.fetch(index) do
+              str2_differences.concat str2_bytes[index..-1]
+              return
+            end
+            if str1_byte != str2_byte
+              str1_differences << str1_byte
+              str2_differences << str2_byte
+            end
+          end
+          # rubocop:enable Style/Next
+          expect(str1_differences.join).to eq(str2_differences.join)
+        end
+        # rubocop:enable Style/MethodLength
+
+      else
+
+        def safe_codepoints(str)
+          str.split(//)
+        end
+
+        def expect_identical_string(str1, str2)
+          str1_bytes = safe_codepoints(str1)
+          str2_bytes = safe_codepoints(str2)
+          expect(str1_bytes).to eq(str2_bytes)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/rspec/support/spec/in_sub_process.rb b/lib/rspec/support/spec/in_sub_process.rb
@@ -1,7 +1,7 @@
 module RSpec
   module Support
     module InSubProcess
-      if Process.respond_to?(:fork) && !(RUBY_PLATFORM == 'java' && RUBY_VERSION == '1.8.7')
+      if Process.respond_to?(:fork) && !(Ruby.jruby? && RUBY_VERSION == '1.8.7')
         # Useful as a way to isolate a global change to a subprocess.
 
         # rubocop:disable MethodLength
@@ -35,7 +35,7 @@ def in_sub_process(prevent_warnings=true)
           raise exception if exception
         end
       else
-        def in_sub_process
+        def in_sub_process(*)
           skip "This spec requires forking to work properly, " \
                "and your platform does not support forking"
         end

diff --git a/script/functions.sh b/script/functions.sh
@@ -7,6 +7,10 @@ source $SCRIPT_DIR/predicate_functions.sh
 
 # idea taken from: http://blog.headius.com/2010/03/jruby-startup-time-tips.html
 export JRUBY_OPTS="${JRUBY_OPTS} -X-C" # disable JIT since these processes are so short lived
+# Set the external encoding to UTF-8 in a 1.8.7-compatible way
+export LANG=en_US.UTF-8
+export LC_ALL=en_US.UTF-8
+
 SPECS_HAVE_RUN_FILE=specs.out
 MAINTENANCE_BRANCH=`cat maintenance-branch`
 
@@ -112,7 +116,7 @@ function check_documentation_coverage {
 }
 
 function check_style_and_lint {
-  echo "bin/rubucop lib"
+  echo "bin/rubocop lib"
   bin/rubocop lib
 }