diff --git a/CHANGELOG.rdoc b/CHANGELOG.rdoc index 070f0e29..d48128c0 100644 --- a/CHANGELOG.rdoc +++ b/CHANGELOG.rdoc @@ -51,6 +51,7 @@ * Blank cookie values are now skipped. GH #80 * Mechanize now adds a '.' to cookie domains if no '.' was sent. This is not allowed by RFC 2109 but does appear in RFC 2965. GH #86 + * file URIs are now read in binary mode. GH #83 === 1.0.0 diff --git a/lib/mechanize.rb b/lib/mechanize.rb index 9b63e860..8dc3808b 100644 --- a/lib/mechanize.rb +++ b/lib/mechanize.rb @@ -652,6 +652,8 @@ def response_parse response, body, uri def response_read response, request body = StringIO.new + body.set_encoding Encoding::BINARY, Encoding::BINARY if + body.respond_to? :set_encoding total = 0 response.read_body { |part| @@ -674,6 +676,8 @@ def response_read response, request end case response['Content-Encoding'] + when nil, 'none', '7bit', 'x-gzip' then + body.string when 'gzip' then Mechanize.log.debug('gunzip body') if Mechanize.log @@ -694,8 +698,6 @@ def response_read response, request zio.close if zio and not zio.closed? end end - when nil, 'none', '7bit', 'x-gzip' then - body.read else raise Mechanize::Error, "Unsupported Content-Encoding: #{response['Content-Encoding']}" diff --git a/lib/mechanize/file_response.rb b/lib/mechanize/file_response.rb index 08a0f7d6..54c9c703 100644 --- a/lib/mechanize/file_response.rb +++ b/lib/mechanize/file_response.rb @@ -11,7 +11,9 @@ def read_body if directory? yield dir_body else - yield File.read(@file_path) + open @file_path, 'rb' do |io| + yield io.read + end end else yield '' @@ -46,15 +48,21 @@ def get_fields(key) end private + def dir_body - '' + - Dir[::File.join(@file_path, '*')].map { |f| - "#{::File.basename(f)}" - }.join("\n") + '' + body = %w[] + body.concat Dir[File.join(@file_path, '*')].map { |f| + "#{File.basename(f)}" + } + body << %w[] + + body = body.join "\n" + body.force_encoding Encoding::BINARY if body.respond_to? :force_encoding + body end def directory? - ::File.directory?(@file_path) + File.directory?(@file_path) end end diff --git a/test/helper.rb b/test/helper.rb index 2b8a368f..689c4cba 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -3,6 +3,8 @@ require 'mechanize' require 'webrick/httputils' require 'servlets' +require 'tmpdir' +require 'tempfile' BASE_DIR = File.dirname(__FILE__) diff --git a/test/test_mechanize.rb b/test/test_mechanize.rb index 8ee32242..41672d5d 100644 --- a/test/test_mechanize.rb +++ b/test/test_mechanize.rb @@ -1,3 +1,5 @@ +# coding: utf-8 + require 'helper' class TestMechanize < Test::Unit::TestCase @@ -401,6 +403,26 @@ def @res.content_length() 4 end assert_equal 'Unsupported Content-Encoding: unknown', e.message end + def test_response_read_file + Tempfile.open 'pi.txt' do |tempfile| + tempfile.write "π\n" + tempfile.flush + tempfile.rewind + + uri = URI.parse "file://#{tempfile.path}" + req = Mechanize::FileRequest.new uri + res = Mechanize::FileResponse.new tempfile.path + + body = @agent.response_read res, req + + expected = "π\n" + expected.force_encoding Encoding::BINARY if expected.respond_to? :encoding + + assert_equal expected, body + assert_equal Encoding::BINARY, body.encoding if body.respond_to? :encoding + end + end + def test_response_read_no_body req = Net::HTTP::Options.new '/'