Skip to content

Commit

Permalink
Handle offset-from-end ranges and make range handling more robust (#803)
Browse files Browse the repository at this point in the history
* Handle offset-from-end ranges and make range handling more robust

This PR adds support for offset-from-end ranges. An offset-from-end
range header looks like "Range: bytes=-10". This returns content
starting 10 bytes from the end of the content to the end of the content.

The PR also updates other range responses to align with the real GCS
server. Specifically, the range handling code now detects when the start
of range is beyond the content bounds and returns a 416 response. Most
other invalid ranges are ignored, except for a few cases where bounds
are automatically adjusted.

All of the cases were verified against the GCS server using requests
like:
```
curl -D - https://storage.googleapis.com/marksandstrom-test/test.txt -H"Range: bytes=0-4"
```

Fixes #536.

* Remove underscore prefix from parseRange function

* Reformat comment

* Fix off by one error and add corresponding test

* Remove unused length variable in test

* Apply one nit from code review

Co-authored-by: fsouza <[email protected]>
  • Loading branch information
dnerdy and fsouza authored Jun 4, 2022
1 parent eb449bf commit 275f44a
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 40 deletions.
138 changes: 111 additions & 27 deletions fakestorage/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"errors"
"fmt"
"io"
"math"
"net/http"
"sort"
"strconv"
Expand Down Expand Up @@ -648,50 +647,135 @@ func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) {
}

status := http.StatusOK
ranged, start, lastByte, content := s.handleRange(obj, r)
if ranged {
ranged, start, lastByte, content, satisfiable := s.handleRange(obj, r)

if ranged && satisfiable {
status = http.StatusPartialContent
w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, lastByte, len(obj.Content)))
}
if obj.ContentType != "" {
w.Header().Set(contentTypeHeader, obj.ContentType)
}
w.Header().Set("Accept-Ranges", "bytes")
w.Header().Set("Content-Length", strconv.Itoa(len(content)))
w.Header().Set("X-Goog-Generation", strconv.FormatInt(obj.Generation, 10))
w.Header().Set("X-Goog-Hash", fmt.Sprintf("crc32c=%s,md5=%s", obj.Crc32c, obj.Md5Hash))
w.Header().Set("Last-Modified", obj.Updated.Format(http.TimeFormat))
if obj.ContentEncoding != "" {
w.Header().Set("Content-Encoding", obj.ContentEncoding)

if ranged && !satisfiable {
status = http.StatusRequestedRangeNotSatisfiable
content = []byte(fmt.Sprintf(`<?xml version='1.0' encoding='UTF-8'?>`+
`<Error><Code>InvalidRange</Code>`+
`<Message>The requested range cannot be satisfied.</Message>`+
`<Details>%s</Details></Error>`, r.Header.Get("Range")))
w.Header().Set(contentTypeHeader, "application/xml; charset=UTF-8")
} else {
if obj.ContentType != "" {
w.Header().Set(contentTypeHeader, obj.ContentType)
}
if obj.ContentEncoding != "" {
w.Header().Set("Content-Encoding", obj.ContentEncoding)
}
}

w.WriteHeader(status)
if r.Method == http.MethodGet {
w.Write(content)
}
}

func (s *Server) handleRange(obj Object, r *http.Request) (ranged bool, start int64, lastByte int64, content []byte) {
if reqRange := r.Header.Get("Range"); reqRange != "" {
parts := strings.SplitN(reqRange, "=", 2)
if len(parts) == 2 && parts[0] == "bytes" {
rangeParts := strings.SplitN(parts[1], "-", 2)
if len(rangeParts) == 2 {
start, _ = strconv.ParseInt(rangeParts[0], 10, 64)
var err error
var end int64
if end, err = strconv.ParseInt(rangeParts[1], 10, 64); err != nil {
end = int64(len(obj.Content))
} else if end != math.MaxInt64 {
end++
}
if end > int64(len(obj.Content)) {
end = int64(len(obj.Content))
}
return true, start, end - 1, obj.Content[start:end]
func (s *Server) handleRange(obj Object, r *http.Request) (ranged bool, start int64, lastByte int64, content []byte, satisfiable bool) {
contentLength := int64(len(obj.Content))
start, end, err := parseRange(r.Header.Get("Range"), contentLength)
if err != nil {
// If the range isn't valid, GCS returns all content.
return false, 0, 0, obj.Content, false
}
// GCS is pretty flexible when it comes to invalid ranges. A 416 http
// response is only returned when the range start is beyond the length of
// the content. Otherwise, the range is ignored.
switch {
// Invalid start. Return 416 and NO content.
// Examples:
// Length: 40, Range: bytes=50-60
// Length: 40, Range: bytes=50-
case start >= contentLength:
// This IS a ranged request, but it ISN'T satisfiable.
return true, 0, 0, []byte{}, false
// Negative range, ignore range and return all content.
// Examples:
// Length: 40, Range: bytes=30-20
case end < start:
return false, 0, 0, obj.Content, false
// Return range. Clamp start and end.
// Examples:
// Length: 40, Range: bytes=-100
// Length: 40, Range: bytes=0-100
default:
if start < 0 {
start = 0
}
if end >= contentLength {
end = contentLength - 1
}
return true, start, end, obj.Content[start : end+1], true
}
}

// parseRange parses the range header and returns the corresponding start and
// end indices in the content. The end index is inclusive. This function
// doesn't validate that the start and end indices fall within the content
// bounds. The content length is only used to handle "suffix length" and
// range-to-end ranges.
func parseRange(rangeHeaderValue string, contentLength int64) (start int64, end int64, err error) {
// For information about the range header, see:
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range
// https://httpwg.org/specs/rfc7233.html#header.range
// https://httpwg.org/specs/rfc7233.html#byte.ranges
// https://httpwg.org/specs/rfc7233.html#status.416
//
// <unit>=<range spec>
//
// The following ranges are parsed:
// "bytes=40-50" (range with given start and end)
// "bytes=40-" (range to end of content)
// "bytes=-40" (suffix length, offset from end of string)
//
// The unit MUST be "bytes".
parts := strings.SplitN(rangeHeaderValue, "=", 2)
if len(parts) != 2 {
return 0, 0, fmt.Errorf("expecting `=` in range header, got: %s", rangeHeaderValue)
}
if parts[0] != "bytes" {
return 0, 0, fmt.Errorf("invalid range unit, expecting `bytes`, got: %s", parts[0])
}
rangeSpec := parts[1]
if len(rangeSpec) == 0 {
return 0, 0, errors.New("empty range")
}
if rangeSpec[0] == '-' {
offsetFromEnd, err := strconv.ParseInt(rangeSpec, 10, 64)
if err != nil {
return 0, 0, fmt.Errorf("invalid suffix length, got: %s", rangeSpec)
}
start = contentLength + offsetFromEnd
end = contentLength - 1
} else {
rangeParts := strings.SplitN(rangeSpec, "-", 2)
if len(rangeParts) != 2 {
return 0, 0, fmt.Errorf("only one range supported, got: %s", rangeSpec)
}
start, err = strconv.ParseInt(rangeParts[0], 10, 64)
if err != nil {
return 0, 0, fmt.Errorf("invalid range start, got: %s", rangeParts[0])
}
if rangeParts[1] == "" {
end = contentLength - 1
} else {
end, err = strconv.ParseInt(rangeParts[1], 10, 64)
if err != nil {
return 0, 0, fmt.Errorf("invalid range end, got: %s", rangeParts[1])
}
}
}
return false, 0, 0, obj.Content
return start, end, nil
}

func (s *Server) patchObject(r *http.Request) jsonResponse {
Expand Down
58 changes: 45 additions & 13 deletions fakestorage/object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,34 +381,66 @@ func TestServerClientObjectRangeReader(t *testing.T) {

runServersTest(t, runServersOptions{objs: objs}, func(t *testing.T, server *Server) {
tests := []struct {
testCase string
offset int64
length int64
testCase string
offset int64
length int64
expectedData string
}{
{
"no length, just offset",
2,
44,
-1,
"my object",
},
{
"zero offset, length",
0,
10,
11,
"some really",
},
{
"offset and length",
4,
5,
11,
"really nice",
},
{
"negative offset",
-9,
-1,
"my object",
},
{
"negative offset before start",
-100,
-1,
content, // Returns all content
},
{
"length too long", // ok
44,
100,
"my object",
},
{
"length too long by exactly one",
44,
10,
"my object",
},
{
"zero range",
0,
0,
// Note: this case is handled by the GCS client, not the
// server; the client doesn't pass a range. It receives all the
// content, and then returns no content to the caller.
"",
},
}
for _, test := range tests {
test := test
t.Run(test.testCase, func(t *testing.T) {
length := test.length
if length == -1 {
length = int64(len(content)) - test.offset
}
expectedData := content[test.offset : test.offset+length]
client := server.Client()
objHandle := client.Bucket(bucketName).Object(objectName)
reader, err := objHandle.NewRangeReader(context.TODO(), test.offset, test.length)
Expand All @@ -420,8 +452,8 @@ func TestServerClientObjectRangeReader(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if string(data) != expectedData {
t.Errorf("wrong data returned\nwant %q\ngot %q", expectedData, string(data))
if string(data) != test.expectedData {
t.Errorf("wrong data returned\nwant %q\ngot %q", test.expectedData, string(data))
}
if ct := reader.Attrs.ContentType; ct != contentType {
t.Errorf("wrong content type\nwant %q\ngot %q", contentType, ct)
Expand Down

0 comments on commit 275f44a

Please sign in to comment.