Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SWIFT-1126 / SWIFT-1137 Add legacy extended JSON parsing, fix Date-related crashes #64

Merged
merged 6 commits into from
Mar 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Sources/SwiftBSON/BSON.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ public enum BSON {
case bool(Bool)

/// A BSON UTC datetime.
/// When serialized to actual BSON bytes, the `Date` must be representable by a 64-bit signed integer
/// of milliseconds since the epoch. If the `Date` cannot be represented in that manner (i.e. it is too far in the
/// future or too far in the past), it will be serialized as either the minimum or maximum possible `Date`,
/// whichever is closer.
/// - SeeAlso: https://docs.mongodb.com/manual/reference/bson-types/#date
case datetime(Date)

Expand Down
118 changes: 93 additions & 25 deletions Sources/SwiftBSON/BSONBinary.swift
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import ExtrasBase64
import ExtrasJSON
import Foundation
import NIO

Expand Down Expand Up @@ -146,6 +147,7 @@ public struct BSONBinary: Equatable, Hashable {

extension BSONBinary: BSONValue {
internal static let extJSONTypeWrapperKeys: [String] = ["$binary", "$uuid"]
internal static let extJSONLegacyTypeWrapperKeys: [String] = ["$type"]

/*
* Initializes a `Binary` from ExtendedJSON.
Expand Down Expand Up @@ -188,38 +190,104 @@ extension BSONBinary: BSONValue {
}
}

// canonical and relaxed extended JSON
guard let binary = try json.value.unwrapObject(withKey: "$binary", keyPath: keyPath) else {
guard case let .object(obj) = json.value, let binary = obj["$binary"] else {
return nil
}
guard
let (base64, subTypeInput) = try binary.unwrapObject(withKeys: "base64", "subType", keyPath: keyPath)
else {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Missing \"base64\" or \"subType\" in \(binary)"
)
}
guard let base64Str = base64.stringValue else {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Could not parse `base64` from \"\(base64)\", " +
"input must be a base64-encoded (with padding as =) payload as a string"
)
}
guard
let subTypeStr = subTypeInput.stringValue,
let subTypeInt = UInt8(subTypeStr, radix: 16),
let subType = Subtype(rawValue: subTypeInt)
else {

let subtype: Subtype
let base64Str: String

switch binary {
// extended JSON v2
case .object:
guard obj.count == 1 else {
throw Swift.DecodingError._extraKeysError(
keyPath: keyPath,
expectedKeys: ["$binary"],
allKeys: Set(obj.keys)
)
}
guard
let (base64, subTypeInput) = try binary.unwrapObject(withKeys: "base64", "subType", keyPath: keyPath)
else {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Missing \"base64\" or \"subType\" in \(binary)"
)
}
guard let b64Str = base64.stringValue else {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Could not parse `base64` from \"\(base64)\", " +
"input must be a base64-encoded (with padding as =) payload as a string"
)
}

guard
let subtypeString = subTypeInput.stringValue,
let subtypeInt = UInt8(subtypeString, radix: 16),
let s = Subtype(rawValue: subtypeInt)
else {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Could not parse `SubType` from \"\(json)\", subtype must"
+ "be a BSON binary type as a one- or two-character hex string"
)
}

base64Str = b64Str
subtype = s
case let .string(base64):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this parses the following format:

{
    "$binary": <base64 string>,
    "$type": <hex string or number>
}

guard obj.count == 2 else {
throw Swift.DecodingError._extraKeysError(
keyPath: keyPath,
expectedKeys: ["$binary"],
allKeys: Set(obj.keys)
)
}

// extended JSON v1 (legacy)
guard let subtypeInput = obj["$type"] else {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "missing \"$type\" key in BSON binary legacy extended JSON representation"
)
}

let subtypeString: String
if let str = subtypeInput.stringValue {
subtypeString = str
} else if case let .number(n) = subtypeInput {
subtypeString = n
} else {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "expected \"$type\" to be a string or number, got \(subtypeInput) instead"
)
}

guard
let subtypeInt = UInt8(subtypeString, radix: 16),
let s = Subtype(rawValue: subtypeInt)
else {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Could not parse `SubType` from \"\(json)\", subtype must be a BSON binary"
+ "type as a one-or-two character hex string or a number"
)
}

base64Str = base64
subtype = s
default:
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Could not parse `SubType` from \"\(subTypeInput)\", " +
"input must be a BSON binary type as a one- or two-character hex string"
debugDescription: "expected extended JSON object for \"$binary\", got \(binary) instead"
)
}

do {
self = try BSONBinary(base64: base64Str, subtype: subType)
self = try BSONBinary(base64: base64Str, subtype: subtype)
} catch {
throw Swift.DecodingError._extendedJSONError(
keyPath: keyPath,
Expand Down
17 changes: 17 additions & 0 deletions Sources/SwiftBSON/BSONEncoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ public class BSONEncoder {
case deferredToDate

/// Encode the `Date` as a BSON datetime object (default).
/// Throws an `EncodingError` if the `Date` is further away from January 1, 1970 than can be represented
/// by a 64-bit signed integer of milliseconds.
case bsonDateTime

/// Encode the `Date` as a 64-bit integer counting the number of milliseconds since January 1, 1970.
/// Throws an `EncodingError` if the `Date` is too far away from then to be represented this way.
case millisecondsSince1970

/// Encode the `Date` as a BSON double counting the number of seconds since January 1, 1970.
Expand Down Expand Up @@ -440,13 +443,27 @@ extension _BSONEncoder {

/// Returns the date as a `BSONValue`, or nil if no values were encoded by the custom encoder strategy.
fileprivate func boxDate(_ date: Date) throws -> BSONValue? {
func validateDate() throws {
guard date.isValidBSONDate() else {
throw EncodingError.invalidValue(
date,
EncodingError.Context(
codingPath: self.codingPath,
debugDescription: "Date must be representable as an Int64 number of milliseconds since epoch"
)
)
}
}

switch self.options.dateEncodingStrategy {
case .bsonDateTime:
try validateDate()
return date
case .deferredToDate:
try date.encode(to: self)
return self.storage.popContainer()
case .millisecondsSince1970:
try validateDate()
return date.msSinceEpoch
case .secondsSince1970:
return date.timeIntervalSince1970
Expand Down
14 changes: 14 additions & 0 deletions Sources/SwiftBSON/BSONError.swift
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,20 @@ extension DecodingError {
debugDescription: debugStart + debugDescription
))
}

internal static func _extraKeysError(
keyPath: [String],
expectedKeys: Set<String>,
allKeys: Set<String>
) -> DecodingError {
let extra = allKeys.subtracting(expectedKeys)

return Self._extendedJSONError(
keyPath: keyPath,
debugDescription: "Expected only the following keys, \(Array(expectedKeys)), instead got extra " +
"key(s): \(extra)"
)
}
}

/// Standardize the errors emitted from the BSON Iterator.
Expand Down
45 changes: 30 additions & 15 deletions Sources/SwiftBSON/BSONRegularExpression.swift
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public struct BSONRegularExpression: Equatable, Hashable {

extension BSONRegularExpression: BSONValue {
internal static let extJSONTypeWrapperKeys: [String] = ["$regularExpression"]
internal static let extJSONLegacyTypeWrapperKeys: [String] = ["$regex", "$options"]

/*
* Initializes a `BSONRegularExpression` from ExtendedJSON.
Expand All @@ -81,22 +82,36 @@ extension BSONRegularExpression: BSONValue {
* - `DecodingError` if `json` is a partial match or is malformed.
*/
internal init?(fromExtJSON json: JSON, keyPath: [String]) throws {
// canonical and relaxed extended JSON
guard let value = try json.value.unwrapObject(withKey: "$regularExpression", keyPath: keyPath) else {
return nil
}
guard
let (pattern, options) = try value.unwrapObject(withKeys: "pattern", "options", keyPath: keyPath),
let patternStr = pattern.stringValue,
let optionsStr = options.stringValue
else {
throw DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Could not parse `BSONRegularExpression` from \"\(value)\", " +
"\"pattern\" and \"options\" must be strings"
)
// canonical and relaxed extended JSON v2
if let regex = try json.value.unwrapObject(withKey: "$regularExpression", keyPath: keyPath) {
guard
let (pattern, options) = try regex.unwrapObject(withKeys: "pattern", "options", keyPath: keyPath),
let patternStr = pattern.stringValue,
let optionsStr = options.stringValue
else {
throw DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Could not parse `BSONRegularExpression` from \"\(regex)\", " +
"\"pattern\" and \"options\" must be strings"
)
}
self = BSONRegularExpression(pattern: patternStr, options: optionsStr)
return
} else {
// legacy / v1 extended JSON
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this parses the following format:

{
    "$regex": <string>,
    "$options": <string>,
}

which is unfortunately, the exact same format as a query operator.

guard
let (pattern, options) = try? json.value.unwrapObject(withKeys: "$regex", "$options", keyPath: keyPath),
let patternStr = pattern.stringValue,
let optionsStr = options.stringValue
else {
// instead of a throwing an error here or as part of unwrapObject, we just return nil to avoid erroring
// when a $regex query operator is being parsed from extended JSON. See the
// "Regular expression as value of $regex query operator with $options" corpus test.
return nil
}
self = BSONRegularExpression(pattern: patternStr, options: optionsStr)
return
}
self = BSONRegularExpression(pattern: patternStr, options: optionsStr)
}

/// Converts this `BSONRegularExpression` to a corresponding `JSON` in relaxed extendedJSON format.
Expand Down
7 changes: 7 additions & 0 deletions Sources/SwiftBSON/BSONValue.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ internal protocol BSONValue: Codable {
/// for this `BSONValue`. (e.g. for Int32, this value is ["$numberInt"]).
static var extJSONTypeWrapperKeys: [String] { get }

/// The `$`-prefixed keys that indicate an object may be a legacy extended JSON object wrapper.
/// Because these keys can conflict with query operators (e.g. "$regex"), they are not always part of
/// an object wrapper and may sometimes be parsed as normal BSON.
static var extJSONLegacyTypeWrapperKeys: [String] { get }

/// Initializes a corresponding `BSON` from the provided `ByteBuffer`,
/// moving the buffer's readerIndex forward to the byte beyond the end
/// of this value.
Expand All @@ -35,6 +40,8 @@ internal protocol BSONValue: Codable {

/// Convenience extension to get static bsonType from an instance
extension BSONValue {
internal static var extJSONLegacyTypeWrapperKeys: [String] { [] }

internal var bsonType: BSONType {
Self.bsonType
}
Expand Down
31 changes: 30 additions & 1 deletion Sources/SwiftBSON/Date+BSONValue.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import NIO
extension Date: BSONValue {
internal static let extJSONTypeWrapperKeys: [String] = ["$date"]

/// The range of datetimes that can be represented in BSON.
private static let VALID_BSON_DATES: Range<Date> = Date(msSinceEpoch: Int64.min)..<Date(msSinceEpoch: Int64.max)

/*
* Initializes a `Date` from ExtendedJSON.
*
Expand Down Expand Up @@ -48,6 +51,15 @@ extension Date: BSONValue {
)
}
self = date
case let .number(ms):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

legacy allows for a regular number instead of a $numberLong wrapper.

// legacy extended JSON
guard let msInt64 = Int64(ms) else {
throw DecodingError._extendedJSONError(
keyPath: keyPath,
debugDescription: "Expected \(ms) to be valid Int64 representing milliseconds since epoch"
)
}
self = Date(msSinceEpoch: msInt64)
default:
throw DecodingError._extendedJSONError(
keyPath: keyPath,
Expand Down Expand Up @@ -87,7 +99,20 @@ extension Date: BSONValue {
internal var bson: BSON { .datetime(self) }

/// The number of milliseconds after the Unix epoch that this `Date` occurs.
internal var msSinceEpoch: Int64 { Int64((self.timeIntervalSince1970 * 1000.0).rounded()) }
/// If the date is further in the future than Int64.max milliseconds from the epoch,
/// Int64.max is returned to prevent a crash.
internal var msSinceEpoch: Int64 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps we should mention this behavior somewhere more user-facing, maybe on the BSON.datetime case?

also, given that msSinceEpoch is only used internally, did you consider an alternative approach where we throw here instead? edit, ah I see why we can't do that from the tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea, done

// to prevent the application from crashing, we simply clamp the date to the range representable
// by an Int64 ms since epoch
guard self > Self.VALID_BSON_DATES.lowerBound else {
return Int64.min
}
guard self < Self.VALID_BSON_DATES.upperBound else {
return Int64.max
}

return Int64((self.timeIntervalSince1970 * 1000.0).rounded())
}

/// Initializes a new `Date` representing the instance `msSinceEpoch` milliseconds
/// since the Unix epoch.
Expand All @@ -105,4 +130,8 @@ extension Date: BSONValue {
internal func write(to buffer: inout ByteBuffer) {
buffer.writeInteger(self.msSinceEpoch, endianness: .little, as: Int64.self)
}

internal func isValidBSONDate() -> Bool {
Self.VALID_BSON_DATES.contains(self)
}
}
12 changes: 11 additions & 1 deletion Sources/SwiftBSON/ExtendedJSONDecoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,15 @@ public class ExtendedJSONDecoder {
}()

/// A set of all the possible extendedJSON wrapper keys.
/// This does not include the legacy extended JSON wrapper keys.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we allow things like "$type" to be in a regular document (e.g. as part of a query operator), the legacy wrapper keys can't be used to determine errors like we do with the reserved ones, so we need to separate them out.

private static var wrapperKeySet: Set<String> = {
Set(ExtendedJSONDecoder.wrapperKeyMap.keys)
var keys: Set<String> = []
for t in BSON.allBSONTypes.values {
for k in t.extJSONTypeWrapperKeys {
keys.insert(k)
}
}
return keys
}()

/// A map from extended JSON wrapper keys (e.g. "$numberLong") to the BSON type(s) that they correspond to.
Expand All @@ -33,6 +40,9 @@ public class ExtendedJSONDecoder {
for k in t.extJSONTypeWrapperKeys {
map[k, default: []].append(t.self)
}
for k in t.extJSONLegacyTypeWrapperKeys {
map[k, default: []].append(t.self)
}
}
return map
}()
Expand Down
Loading