-
Notifications
You must be signed in to change notification settings - Fork 4.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Source Salesforce: fix pagination in REST API streams #9151
Changes from 8 commits
6ba06b1
1a3f70c
46efe41
7b8dfd9
a1cdb36
c828d6f
bf66e4a
ffe8185
1d63cdb
4c23d6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,20 +44,29 @@ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: | |
def url_base(self) -> str: | ||
return self.sf_api.instance_url | ||
|
||
def path(self, **kwargs) -> str: | ||
def path(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> str: | ||
if next_page_token: | ||
""" | ||
If `next_page_token` is set, subsequent requests use `nextRecordsUrl`. | ||
""" | ||
return next_page_token | ||
return f"/services/data/{self.sf_api.version}/queryAll" | ||
|
||
def next_page_token(self, response: requests.Response) -> str: | ||
response_data = response.json() | ||
if len(response_data["records"]) == self.page_size and self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main issue was here, |
||
return f"WHERE {self.primary_key} >= '{response_data['records'][-1][self.primary_key]}' " | ||
return response_data.get("nextRecordsUrl") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @vitaliizazmic |
||
|
||
def request_params( | ||
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None | ||
) -> MutableMapping[str, Any]: | ||
""" | ||
Salesforce SOQL Query: https://developer.salesforce.com/docs/atlas.en-us.232.0.api_rest.meta/api_rest/dome_queryall.htm | ||
""" | ||
if next_page_token: | ||
""" | ||
If `next_page_token` is set, subsequent requests use `nextRecordsUrl`, and do not include any parameters. | ||
""" | ||
return {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we trying to get 2nd and more pages we don't need to send any params, use just |
||
|
||
selected_properties = self.get_json_schema().get("properties", {}) | ||
|
||
|
@@ -70,11 +79,9 @@ def request_params( | |
} | ||
|
||
query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " | ||
if next_page_token: | ||
query += next_page_token | ||
|
||
if self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: | ||
query += f"ORDER BY {self.primary_key} ASC LIMIT {self.page_size}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, in REST API, we don't need to LIMIT results set, because limit does not work for all streams. |
||
query += f"ORDER BY {self.primary_key} ASC" | ||
|
||
return {"q": query} | ||
|
||
|
@@ -259,6 +266,32 @@ def next_page_token(self, last_record: dict) -> str: | |
if self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: | ||
return f"WHERE {self.primary_key} >= '{last_record[self.primary_key]}' " | ||
|
||
def request_params( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before the change, |
||
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None | ||
) -> MutableMapping[str, Any]: | ||
""" | ||
Salesforce SOQL Query: https://developer.salesforce.com/docs/atlas.en-us.232.0.api_rest.meta/api_rest/dome_queryall.htm | ||
""" | ||
|
||
selected_properties = self.get_json_schema().get("properties", {}) | ||
|
||
# Salesforce BULK API currently does not support loading fields with data type base64 and compound data | ||
if self.sf_api.api_type == "BULK": | ||
selected_properties = { | ||
key: value | ||
for key, value in selected_properties.items() | ||
if value.get("format") != "base64" and "object" not in value["type"] | ||
} | ||
|
||
query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " | ||
if next_page_token: | ||
query += next_page_token | ||
|
||
if self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: | ||
query += f"ORDER BY {self.primary_key} ASC LIMIT {self.page_size}" | ||
|
||
return {"q": query} | ||
|
||
def read_records( | ||
self, | ||
sync_mode: SyncMode, | ||
|
@@ -305,14 +338,15 @@ def format_start_date(start_date: Optional[str]) -> Optional[str]: | |
if start_date: | ||
return pendulum.parse(start_date).strftime("%Y-%m-%dT%H:%M:%SZ") | ||
|
||
def next_page_token(self, response: requests.Response) -> str: | ||
response_data = response.json() | ||
if len(response_data["records"]) == self.page_size and self.name not in UNSUPPORTED_FILTERING_STREAMS: | ||
return response_data["records"][-1][self.cursor_field] | ||
|
||
def request_params( | ||
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None | ||
) -> MutableMapping[str, Any]: | ||
if next_page_token: | ||
""" | ||
If `next_page_token` is set, subsequent requests use `nextRecordsUrl`, and do not include any parameters. | ||
""" | ||
return {} | ||
|
||
selected_properties = self.get_json_schema().get("properties", {}) | ||
|
||
# Salesforce BULK API currently does not support loading fields with data type base64 and compound data | ||
|
@@ -324,13 +358,13 @@ def request_params( | |
} | ||
|
||
stream_date = stream_state.get(self.cursor_field) | ||
start_date = next_page_token or stream_date or self.start_date | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need |
||
start_date = stream_date or self.start_date | ||
|
||
query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " | ||
if start_date: | ||
query += f"WHERE {self.cursor_field} >= {start_date} " | ||
if self.name not in UNSUPPORTED_FILTERING_STREAMS: | ||
query += f"ORDER BY {self.cursor_field} ASC LIMIT {self.page_size}" | ||
query += f"ORDER BY {self.cursor_field} ASC" | ||
return {"q": query} | ||
|
||
@property | ||
|
@@ -352,3 +386,26 @@ class BulkIncrementalSalesforceStream(BulkSalesforceStream, IncrementalSalesforc | |
def next_page_token(self, last_record: dict) -> str: | ||
if self.name not in UNSUPPORTED_FILTERING_STREAMS: | ||
return last_record[self.cursor_field] | ||
|
||
def request_params( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before the change, this method was inherited from |
||
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None | ||
) -> MutableMapping[str, Any]: | ||
selected_properties = self.get_json_schema().get("properties", {}) | ||
|
||
# Salesforce BULK API currently does not support loading fields with data type base64 and compound data | ||
if self.sf_api.api_type == "BULK": | ||
selected_properties = { | ||
key: value | ||
for key, value in selected_properties.items() | ||
if value.get("format") != "base64" and "object" not in value["type"] | ||
} | ||
|
||
stream_date = stream_state.get(self.cursor_field) | ||
start_date = next_page_token or stream_date or self.start_date | ||
|
||
query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " | ||
if start_date: | ||
query += f"WHERE {self.cursor_field} >= {start_date} " | ||
if self.name not in UNSUPPORTED_FILTERING_STREAMS: | ||
query += f"ORDER BY {self.cursor_field} ASC LIMIT {self.page_size}" | ||
return {"q": query} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nextRecordsUrl
is relative url, so we can return it here instead original url