diff --git a/CHANGELOG.rdoc b/CHANGELOG.rdoc index 79dbde41..2e4612f2 100644 --- a/CHANGELOG.rdoc +++ b/CHANGELOG.rdoc @@ -13,6 +13,8 @@ for fine-grained parser choices. Parsers will be chosen based on exact match, simplified type or media type in that order. See Mechanize::PluggableParser#[]=. + * Added Mechanize#download which downloads a response body to an IO-like or + filename. * Bug fixes * Fixed handling of a HEAD request with Accept-Encoding: gzip. Issue #198 diff --git a/lib/mechanize.rb b/lib/mechanize.rb index 69f1d5e0..41f1da4d 100644 --- a/lib/mechanize.rb +++ b/lib/mechanize.rb @@ -319,6 +319,44 @@ def click link end end + ## + # GETs +uri+ and writes it to +io_or_filename+ without recording the request + # in the history. If +io_or_filename+ does not respond to #write it will be + # used as a file name. +parameters+, +referer+ and +headers+ are used as in + # #get. + # + # By default, if the Content-type of the response matches a Mechanize::File + # or Mechanize::Page parser, the response body will be loaded into memory + # before being saved. See #pluggable_parser for details on changing this + # default. + + def download uri, io_or_filename, parameters = [], referer = nil, headers = {} + page = transact do + get uri, parameters, referer, headers + end + + io = if io_or_filename.respond_to? :write then + io_or_filename + else + open io_or_filename, 'wb' + end + + case page + when Mechanize::File then + io.write page.body + else + body_io = page.body_io + + until body_io.eof? do + io.write body_io.read 16384 + end + end + + page + ensure + io.close if io and not io_or_filename.respond_to? :write + end + ## # DELETE +uri+ with +query_params+, and setting +headers+: # @@ -341,18 +379,20 @@ def get(uri, parameters = [], referer = nil, headers = {}) referer ||= if uri.to_s =~ %r{\Ahttps?://} - Page.new(nil, {'content-type'=>'text/html'}) + Page.new(nil, 'content-type' => 'text/html') else - current_page || Page.new(nil, {'content-type'=>'text/html'}) + current_page || Page.new(nil, 'content-type' => 'text/html') end # FIXME: Huge hack so that using a URI as a referer works. I need to # refactor everything to pass around URIs but still support # Mechanize::Page#base unless Mechanize::Parser === referer then - referer = referer.is_a?(String) ? - Page.new(URI.parse(referer), {'content-type' => 'text/html'}) : - Page.new(referer, {'content-type' => 'text/html'}) + referer = if referer.is_a?(String) then + Page.new URI(referer), 'content-type' => 'text/html' + else + Page.new referer, 'content-type' => 'text/html' + end end # fetch the page @@ -1062,7 +1102,6 @@ def parse uri, response, body content_type, = data.downcase.split ',', 2 unless data.nil? end - # Find our pluggable parser parser_klass = @pluggable_parser.parser content_type unless parser_klass <= Mechanize::Download then diff --git a/test/test_mechanize.rb b/test/test_mechanize.rb index b7fd17b3..1a94cc29 100644 --- a/test/test_mechanize.rb +++ b/test/test_mechanize.rb @@ -275,16 +275,45 @@ def test_delete_redirect assert_equal 'GET', page.header['X-Request-Method'] end - #def test_download - # Dir.mktmpdir do |dir| - # file = "#{dir}/download" - # open file, 'w' do |io| - # @mech.download 'http://example', io - # end - - # assert_equal 1, File.stat(file).size - # end - #end + def test_download + page = nil + + in_tmpdir do + open 'download', 'w' do |io| + page = @mech.download 'http://example', io + + refute io.closed? + end + + assert_operator 1, :<=, File.stat('download').size + end + + assert_empty @mech.history + assert_kind_of Mechanize::Page, page + end + + def test_download_filename + page = nil + + in_tmpdir do + page = @mech.download 'http://example', 'download' + + assert_operator 1, :<=, File.stat('download').size + end + + assert_empty @mech.history + assert_kind_of Mechanize::Page, page + end + + def test_download_filename_error + in_tmpdir do + assert_raises Mechanize::UnauthorizedError do + @mech.download 'http://example/digest_auth', 'download' + end + + refute File.exist? 'download' + end + end def test_get uri = URI 'http://localhost'