Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cosmos] Bulk/Batch APIs with v1/v2 hashing in JS #10168

Merged
merged 31 commits into from
Jul 28, 2020
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8b7f6b0
Adds v1 partitionKey hashing
zfoster Jun 30, 2020
a5e78ac
Fix comment
zfoster Jun 30, 2020
e066237
Adds murmurhash library and uses Bytes
zfoster Jun 30, 2020
b212ecf
Try es2020
zfoster Jun 30, 2020
60034c2
Use BigInt init everywhere
zfoster Jul 1, 2020
ea7a53f
Adds v2 hashing support
zfoster Jul 2, 2020
90aae54
Adds back Bulk operations for v1 and v2 containers
zfoster Jul 7, 2020
684772b
Replaces bigint with JSBI
zfoster Jul 9, 2020
6c03482
Try other keys
zfoster Jul 10, 2020
4cdef29
revert comments
zfoster Jul 11, 2020
61d41ce
Pushes logs to mess with number partitioning
zfoster Jul 14, 2020
18864b5
Remove pk from public API, fix test
zfoster Jul 16, 2020
68b1fe7
Correctly orders operations
zfoster Jul 17, 2020
5ef791c
Adds recursive bulk calls on 410 errors
zfoster Jul 20, 2020
c677566
Use BytePrefix everywhere
zfoster Jul 20, 2020
5e256ff
Fix hash prefix
zfoster Jul 20, 2020
0247870
Fixes {} case and errors on 410s
zfoster Jul 20, 2020
5c9841b
Merge branch 'master' into zf/bulk
zfoster Jul 21, 2020
0e6948f
Rush update and es6
zfoster Jul 21, 2020
7fe7dcb
Merge master and add declaration module
zfoster Jul 22, 2020
8025fcf
Add .d.ts murmurHash file
zfoster Jul 22, 2020
ec1d194
Adds murmurHash as ts file
zfoster Jul 22, 2020
d74f1e3
Fix converted murmurhash by removing vars
zfoster Jul 22, 2020
00d33b4
Exposes types publicly, makes partitionKeyRangeId consistent
zfoster Jul 23, 2020
03115d1
Adds 100 operation limit, removes lint issues
zfoster Jul 24, 2020
91bfcac
Adds 100 item limit, changelog, string docs
zfoster Jul 24, 2020
dd162ea
Fixes api diff
zfoster Jul 24, 2020
8e08212
Update sdk/cosmosdb/cosmos/CHANGELOG.md
zfoster Jul 27, 2020
efd1572
Remove partitionKey as header in top level
zfoster Jul 27, 2020
6842c09
Merge branch 'zf/bulk' of https://github.com/zfoster/azure-sdk-for-js…
zfoster Jul 27, 2020
dcaa388
Merge with master
zfoster Jul 27, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
983 changes: 494 additions & 489 deletions common/config/rush/pnpm-lock.yaml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions sdk/cosmosdb/cosmos/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
"@types/debug": "^4.1.4",
"debug": "^4.1.1",
"fast-json-stable-stringify": "^2.0.0",
"jsbi": "^3.1.3",
"node-abort-controller": "^1.0.4",
"node-fetch": "^2.6.0",
"os-name": "^3.1.0",
Expand Down
18 changes: 18 additions & 0 deletions sdk/cosmosdb/cosmos/review/cosmos.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ export class ChangeFeedResponse<T> {
export class ClientContext {
constructor(cosmosClientOptions: CosmosClientOptions, globalEndpointManager: GlobalEndpointManager);
// (undocumented)
bulk<T>({ body, path, resourceId, partitionKeyRange, options }: {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

partitionKeyRangeId for consistency

body: T;
path: string;
partitionKeyRange: string;
resourceId: string;
options?: RequestOptions;
}): Promise<Response<any>>;
// (undocumented)
clearSessionToken(path: string): void;
// (undocumented)
create<T, U = T>({ body, path, resourceType, resourceId, options, partitionKey }: {
Expand Down Expand Up @@ -330,6 +338,9 @@ export const Constants: {
EnableScriptLogging: string;
ScriptLogResults: string;
ALLOW_MULTIPLE_WRITES: string;
IsBatchRequest: string;
IsBatchAtomic: string;
ForceRefresh: string;
};
WritableLocations: string;
ReadableLocations: string;
Expand Down Expand Up @@ -759,6 +770,11 @@ export class ItemResponse<T extends ItemDefinition> extends ResourceResponse<T &
// @public
export class Items {
constructor(container: Container, clientContext: ClientContext);
// Warning: (ae-forgotten-export) The symbol "Operation" needs to be exported by the entry point index.d.ts

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please fix.

// Warning: (ae-forgotten-export) The symbol "OperationResponse" needs to be exported by the entry point index.d.ts
//
// (undocumented)
bulk(operations: Operation[], options?: RequestOptions): Promise<OperationResponse[]>;
changeFeed(partitionKey: string | number | boolean, changeFeedOptions?: ChangeFeedOptions): ChangeFeedIterator<any>;
changeFeed(changeFeedOptions?: ChangeFeedOptions): ChangeFeedIterator<any>;
changeFeed<T>(partitionKey: string | number | boolean, changeFeedOptions?: ChangeFeedOptions): ChangeFeedIterator<T>;
Expand Down Expand Up @@ -857,6 +873,8 @@ export class Offers {

// @public (undocumented)
export enum OperationType {
// (undocumented)
Batch = "batch",
// (undocumented)
Create = "create",
// (undocumented)
Expand Down
49 changes: 49 additions & 0 deletions sdk/cosmosdb/cosmos/src/ClientContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,55 @@ export class ClientContext {
return this.globalEndpointManager.getReadEndpoint();
}

public async bulk<T>({
body,
path,
resourceId,
partitionKeyRange,
options = {}
}: {
body: T;
path: string;
partitionKeyRange: string;
resourceId: string;
options?: RequestOptions;
}) {
try {
const request: RequestContext = {
globalEndpointManager: this.globalEndpointManager,
requestAgent: this.cosmosClientOptions.agent,
connectionPolicy: this.connectionPolicy,
method: HTTPMethod.post,
client: this,
operationType: OperationType.Batch,
path,
body,
resourceType: ResourceType.item,
resourceId,
plugins: this.cosmosClientOptions.plugins,
options
};

request.headers = await this.buildHeaders(request);
request.headers[Constants.HttpHeaders.IsBatchRequest] = "True";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
request.headers[Constants.HttpHeaders.IsBatchRequest] = "True";
request.headers[Constants.HttpHeaders.IsBatchRequest] = true;

request.headers[Constants.HttpHeaders.PartitionKeyRangeID] = partitionKeyRange;
request.headers[Constants.HttpHeaders.IsBatchAtomic] = false;
southpolesteve marked this conversation as resolved.
Show resolved Hide resolved

this.applySessionToken(request);

request.endpoint = await this.globalEndpointManager.resolveServiceEndpoint(
request.resourceType,
request.operationType
);
const response = await executePlugins(request, executeRequest, PluginOn.operation);
this.captureSessionToken(undefined, path, OperationType.Batch, response.headers);
return response;
} catch (err) {
this.captureSessionToken(err, path, OperationType.Upsert, (err as ErrorResponse).headers);
throw err;
}
}

private captureSessionToken(
err: ErrorResponse,
path: string,
Expand Down
84 changes: 79 additions & 5 deletions sdk/cosmosdb/cosmos/src/client/Item/Items.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,20 @@ import { extractPartitionKey } from "../../extractPartitionKey";
import { FetchFunctionCallback, SqlQuerySpec } from "../../queryExecutionContext";
import { QueryIterator } from "../../queryIterator";
import { FeedOptions, RequestOptions } from "../../request";
import { Container } from "../Container";
import { Container, PartitionKeyRange } from "../Container";
import { Item } from "./Item";
import { ItemDefinition } from "./ItemDefinition";
import { ItemResponse } from "./ItemResponse";
import {
Batch,
isKeyInRange,
Operation,
getPartitionKeyToHash,
addPKToOperation,
OperationResponse,
} from "../../utils/batch";
import { hashV1PartitionKey } from "../../utils/hashing/v1";
import { hashV2PartitionKey } from "../../utils/hashing/v2";

/**
* @ignore
Expand All @@ -39,7 +49,7 @@ export class Items {
constructor(
public readonly container: Container,
private readonly clientContext: ClientContext
) { }
) {}

/**
* Queries all items.
Expand Down Expand Up @@ -85,7 +95,7 @@ export class Items {
resultFn: (result) => (result ? result.Documents : []),
query,
options: innerOptions,
partitionKey: options.partitionKey
partitionKey: options.partitionKey,
});
};

Expand Down Expand Up @@ -287,7 +297,7 @@ export class Items {
resourceType: ResourceType.item,
resourceId: id,
options,
partitionKey
partitionKey,
});

const ref = new Item(
Expand Down Expand Up @@ -356,7 +366,7 @@ export class Items {
resourceType: ResourceType.item,
resourceId: id,
options,
partitionKey
partitionKey,
});

const ref = new Item(
Expand All @@ -373,4 +383,68 @@ export class Items {
ref
);
}

public async bulk(
southpolesteve marked this conversation as resolved.
Show resolved Hide resolved
operations: Operation[],
options?: RequestOptions
): Promise<OperationResponse[]> {
const {
resources: partitionKeyRanges,
} = await this.container.readPartitionKeyRanges().fetchAll();
southpolesteve marked this conversation as resolved.
Show resolved Hide resolved
const { resource: definition } = await this.container.getPartitionKeyDefinition();
const batches: Batch[] = partitionKeyRanges.map((keyRange: PartitionKeyRange) => {
return {
min: keyRange.minInclusive,
max: keyRange.maxExclusive,
rangeId: keyRange.id,
indexes: [],
operations: [],
};
});
operations
.map((operation) => addPKToOperation(operation, definition))
.forEach((operation: Operation, index: number) => {
const partitionProp = definition.paths[0].replace("/", "");
southpolesteve marked this conversation as resolved.
Show resolved Hide resolved
const isV2 = definition.version && definition.version === 2;
const toHashKey = getPartitionKeyToHash(operation, partitionProp);
const hashed = isV2 ? hashV2PartitionKey(toHashKey) : hashV1PartitionKey(toHashKey);
const batchForKey = batches.find((batch: Batch) => {
return isKeyInRange(batch.min, batch.max, hashed);
});
batchForKey.operations.push(operation);
batchForKey.indexes.push(index);
});

const path = getPathFromLink(this.container.url, ResourceType.item);

const orderedResponses: OperationResponse[] = [];
await Promise.all(
batches
.filter((batch: Batch) => batch.operations.length)
.map(async (batch: Batch) => {
try {
const response = await this.clientContext.bulk({
body: batch.operations,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each service request can take 100 operations and be up to 2 MB (recommend ~200 KB unless a single doc is bigger which which case fit only one operation into the request).

partitionKeyRange: batch.rangeId,
path,
resourceId: this.container.url,
options,
});
response.result.forEach((operationResponse: OperationResponse, index: number) => {
orderedResponses[batch.indexes[index]] = operationResponse;
});
} catch (err) {
// In the case of 410 errors, we need to recompute the partition key ranges
// and redo the batch request, however, 410 errors occur for unsupported
// partition key types as well since we don't support them, so for now we throw
if (err.code === 410) {
throw new Error(
"Partition key error. Either the partitions have split or an operation has an unsupported partitionKey type"
);
}
}
})
);
return orderedResponses;
}
}
12 changes: 10 additions & 2 deletions sdk/cosmosdb/cosmos/src/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,14 @@ export const Constants = {
ScriptLogResults: "x-ms-documentdb-script-log-results",

// Multi-Region Write
ALLOW_MULTIPLE_WRITES: "x-ms-cosmos-allow-tentative-writes"
ALLOW_MULTIPLE_WRITES: "x-ms-cosmos-allow-tentative-writes",

// Bulk/Batch header
IsBatchRequest: "x-ms-cosmos-is-batch-request",
IsBatchAtomic: "x-ms-cosmos-batch-atomic",

// Cache Refresh header
ForceRefresh: "x-ms-force-refresh"
},

// GlobalDB related constants
Expand Down Expand Up @@ -247,5 +254,6 @@ export enum OperationType {
Delete = "delete",
Read = "read",
Query = "query",
Execute = "execute"
Execute = "execute",
Batch = "batch"
}
90 changes: 90 additions & 0 deletions sdk/cosmosdb/cosmos/src/utils/batch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { JSONObject } from "../queryExecutionContext";
import { extractPartitionKey } from "../extractPartitionKey";
import { PartitionKeyDefinition } from "../documents";

export type Operation =
| CreateOperation
| UpsertOperation
| ReadOperation
| DeleteOperation
| ReplaceOperation;

export interface Batch {
min: string;
max: string;
rangeId: string;
indexes: number[];
operations: Operation[];
}

export interface OperationResponse {
statusCode: number;
requestCharge: number;
eTag?: string;
resourceBody?: JSONObject;
}

export function isKeyInRange(min: string, max: string, key: string) {
const isAfterMinInclusive = key.localeCompare(min) >= 0;
const isBeforeMax = key.localeCompare(max) < 0;
return isAfterMinInclusive && isBeforeMax;
}

interface OperationBase {
partitionKey?: string;
ifMatch?: string;
ifNoneMatch?: string;
}

type OperationWithItem = OperationBase & {
resourceBody: JSONObject;
};

type CreateOperation = OperationWithItem & {
operationType: "Create";
};

type UpsertOperation = OperationWithItem & {
operationType: "Upsert";
};

type ReadOperation = OperationBase & {
operationType: "Read";
id: string;
};

type DeleteOperation = OperationBase & {
operationType: "Delete";
id: string;
};

type ReplaceOperation = OperationWithItem & {
operationType: "Replace";
id: string;
};

export function hasResource(
operation: Operation
): operation is CreateOperation | UpsertOperation | ReplaceOperation {
return (operation as OperationWithItem).resourceBody !== undefined;
}

export function getPartitionKeyToHash(operation: Operation, partitionProperty: string) {
const toHashKey = hasResource(operation)
? (operation.resourceBody as any)[partitionProperty]
: operation.partitionKey.replace(/[\[\]\"\']/g, "");
// We check for empty object since replace will stringify the value
// The second check avoids cases where the partitionKey value is actually the string '{}'
if (toHashKey === "{}" && operation.partitionKey === "[{}]") {
return {};
}
return toHashKey;
}

export function addPKToOperation(operation: Operation, definition: PartitionKeyDefinition) {
if (operation.partitionKey || !hasResource(operation)) {
return operation;
}
const pk = extractPartitionKey(operation.resourceBody, definition);
return { ...operation, partitionKey: JSON.stringify(pk) };
}
Loading