Skip to content

Commit

Permalink
Updated samples: SharePoint utility to fetch drives. Drive and Folder…
Browse files Browse the repository at this point in the history
… Path input
  • Loading branch information
Raj725 committed Jul 19, 2024
1 parent 53e2848 commit fa7cc34
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ OPENAI_API_KEY=<YOUR OPENAI API KEY>
O365_CLIENT_ID=<YOUR APPLICATION (CLIENT) ID>
O365_CLIENT_SECRET=<YOUR CLIENT SECRET>
O365_TENANT_ID=<YOUR TENANT ID>
SHAREPOINT_SITE_URL=<YOUR SHAREPOINT SITE URL>

# Pebblo configuration
PEBBLO_CLOUD_URL=<PEBBLO CLOUD URL>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ OPENAI_API_KEY=<YOUR OPENAI API KEY>
O365_CLIENT_ID=<YOUR APPLICATION (CLIENT) ID>
O365_CLIENT_SECRET=<YOUR CLIENT SECRET>
O365_TENANT_ID=<YOUR TENANT ID>
SHAREPOINT_SITE_URL=<YOUR SHAREPOINT SITE URL>

# Postgres configuration
PG_CONNECTION_STRING = "postgresql://<USERNAME>:<PASSWORD>@<HOST>:<PORT>/<DATABASE-NAME>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,88 @@ def get_access_token(self):
else:
return response.json()["access_token"]

@staticmethod
def format_site_url(site_url: str):
"""
Formats the site URL to include the colon(:) in the URL as required by the Microsoft Graph API.
Example:
1. Default site URL:
input: https://<tenant-name>.sharepoint.com/
output: tenant.sharepoint.com
2. Custom site URL:
input: https://<tenant-name>.sharepoint.com/sites/<site-name>
output: tenant.sharepoint.com:/sites/<site-name>
:param site_url: The original SharePoint site URL.
:return: The formatted site URL with a colon after the tenant domain.
"""

# Check if the site URL contains the "/sites/" substring and format the URL accordingly
if "/sites/" in site_url:
parts = site_url.split("/sites/")
if parts[0].endswith(":"):
# If the URL already contains a colon, use the URL as is
formatted_url = site_url
else:
# Add a colon after the tenant domain
formatted_url = f"{parts[0]}:/sites/{parts[1]}"
else:
formatted_url = site_url

# Remove the https:// prefix from the site URL
formatted_url = formatted_url.replace("https://", "")
return formatted_url

def get_site_id(self, site_url):
"""
This function retrieves the ID of a SharePoint site using the Microsoft Graph API.
Parameters:
site_url (str): The URL of the SharePoint site.
Returns:
str: The ID of the SharePoint site.
"""
# Format the site URL
site_url = self.format_site_url(site_url)
# Build URL to request site ID
full_url = f"https://graph.microsoft.com/v1.0/sites/{site_url}"
response = requests.get(
full_url, headers={"Authorization": f"Bearer {self.access_token}"}
)
site_id = response.json().get("id") # Return the site ID
return site_id

def get_drive_id(self, site_id):
"""
This function retrieves the IDs and names of all drives associated with a specified SharePoint site.
Parameters:
site_id (str): The ID of the SharePoint site.
Returns:
list: A list of dictionaries. Each dictionary represents a drive on the SharePoint site.
Each dictionary contains the following keys:
- 'id': The ID of the drive.
- 'name': The name of the drive.
"""

# Retrieve drive IDs and names associated with a site
try:
drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
response = requests.get(drives_url, headers=self.headers)
drives = response.json().get("value", [])
drive_info = [
({"id": drive["id"], "name": drive["name"]}) for drive in drives
]
# print(f"Drive Info: {drive_info}")
return drive_info
except requests.exceptions.HTTPError as e:
print(
f"Error while retrieving document library ID from Microsoft Graph API, Error: {e}"
)
return []


if __name__ == "__main__":
pass
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,52 @@ def ask(
return self.retrieval_chain.invoke(chain_input.dict())


def select_drive(drives: list) -> tuple:
"""
Select SharePoint drive from the available drives
"""
if not drives:
print("No drives found for the site. Exiting ...")
exit(1)
elif len(drives) == 1:
_drive_id = drives[0].get("id")
_drive_name = drives[0].get("name")
else:
# Select "Documents" as a default drive
def_drive_idx = next(
(
idx
for idx, drive in enumerate(drives)
if drive.get("name") == "Documents"
),
0,
)
# Select drive
# print("Select a drive ...")
print("Available drives on the site:")
for idx, drive in enumerate(drives):
print(f"\t{idx + 1}. {drive.get('name')}")

# Prompt user for drive index
_drive_idx = input(f"Enter drive index (default={def_drive_idx + 1}): ")
_drive_idx = int(_drive_idx) - 1 if _drive_idx else def_drive_idx
# Validate drive index and select default drive if invalid
if _drive_idx < 0 or _drive_idx >= len(drives):
print("Error. Invalid drive index! Selecting the default drive ...")
_drive_idx = def_drive_idx

# Get drive info
_drive_id = drives[_drive_idx].get("id")
_drive_name = drives[_drive_idx].get("name")
return _drive_id, _drive_name


if __name__ == "__main__":
input_collection_name = "identity-enabled-rag-sharepoint"
_client_id = os.environ.get("O365_CLIENT_ID")
_client_secret = os.environ.get("O365_CLIENT_SECRET")
_tenant_id = os.environ.get("O365_TENANT_ID")
_site_url = os.environ.get("SHAREPOINT_SITE_URL")

print("Please enter the app details to authenticate with Microsoft Graph API ...")
app_client_id = input(f"App client id ({_client_id}): ") or _client_id
Expand All @@ -171,12 +212,38 @@ def ask(
)
tenant_id = input(f"Tenant id ({_tenant_id}): ") or _tenant_id

print("\nPlease enter drive id for loading data...")
drive_id = input("Drive id : ")
print("\nInitializing SharepointADHelper ...")
sharepoint_helper = SharepointADHelper(
client_id=app_client_id,
client_secret=app_client_secret,
tenant_id=tenant_id,
)
print("SharepointADHelper initialized.")

site_url = (
input(f"\nEnter Sharepoint Site URL (default={_site_url}): ") or _site_url
)
if not site_url:
print("\nSite URL is required. Exiting ...")
exit(1)

# Get SharePoint Site ID using URL
site_id = sharepoint_helper.get_site_id(site_url)
print(f"Derived Site Id: {site_id}\n")

# Get drive info using site id
print("Fetching drive info ...")
drive_info = sharepoint_helper.get_drive_id(site_id)
drive_id, drive_name = select_drive(drive_info)
print(f"\nSharePoint Drive name: {drive_name}, Drive Id: {drive_id}")

# Enter Folder path
folder_path = input("\nEnter folder path (default='/document'): ") or "/document"

# Initialize PebbloSafeRAG app
rag_app = PebbloSafeRAG(
drive_id=drive_id,
folder_path="/document",
folder_path=folder_path,
collection_name=input_collection_name,
)

Expand All @@ -202,11 +269,9 @@ def ask(

prompt = input("Please provide the prompt : ")

authorized_identities = SharepointADHelper(
client_id=app_client_id,
client_secret=app_client_secret,
tenant_id=tenant_id,
).get_authorized_identities(end_user_email_address)
authorized_identities = sharepoint_helper.get_authorized_identities(
end_user_email_address
)

response = rag_app.ask(
prompt,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ OPENAI_API_KEY=<YOUR OPENAI API KEY>
O365_CLIENT_ID=<YOUR APPLICATION (CLIENT) ID>
O365_CLIENT_SECRET=<YOUR CLIENT SECRET>
O365_TENANT_ID=<YOUR TENANT ID>
SHAREPOINT_SITE_URL=<YOUR SHAREPOINT SITE URL>

# Pebblo configuration
PEBBLO_CLOUD_URL=<PEBBLO CLOUD URL>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ OPENAI_API_KEY=<YOUR OPENAI API KEY>
O365_CLIENT_ID=<YOUR APPLICATION (CLIENT) ID>
O365_CLIENT_SECRET=<YOUR CLIENT SECRET>
O365_TENANT_ID=<YOUR TENANT ID>
SHAREPOINT_SITE_URL=<YOUR SHAREPOINT SITE URL>

# Pebblo Cloud configuration (optional)
PEBBLO_CLOUD_URL=<PEBBLO CLOUD URL>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,88 @@ def get_access_token(self):
else:
return response.json()["access_token"]

@staticmethod
def format_site_url(site_url: str):
"""
Formats the site URL to include the colon(:) in the URL as required by the Microsoft Graph API.
Example:
1. Default site URL:
input: https://<tenant-name>.sharepoint.com/
output: tenant.sharepoint.com
2. Custom site URL:
input: https://<tenant-name>.sharepoint.com/sites/<site-name>
output: tenant.sharepoint.com:/sites/<site-name>
:param site_url: The original SharePoint site URL.
:return: The formatted site URL with a colon after the tenant domain.
"""

# Check if the site URL contains the "/sites/" substring and format the URL accordingly
if "/sites/" in site_url:
parts = site_url.split("/sites/")
if parts[0].endswith(":"):
# If the URL already contains a colon, use the URL as is
formatted_url = site_url
else:
# Add a colon after the tenant domain
formatted_url = f"{parts[0]}:/sites/{parts[1]}"
else:
formatted_url = site_url

# Remove the https:// prefix from the site URL
formatted_url = formatted_url.replace("https://", "")
return formatted_url

def get_site_id(self, site_url):
"""
This function retrieves the ID of a SharePoint site using the Microsoft Graph API.
Parameters:
site_url (str): The URL of the SharePoint site.
Returns:
str: The ID of the SharePoint site.
"""
# Format the site URL
site_url = self.format_site_url(site_url)
# Build URL to request site ID
full_url = f"https://graph.microsoft.com/v1.0/sites/{site_url}"
response = requests.get(
full_url, headers={"Authorization": f"Bearer {self.access_token}"}
)
site_id = response.json().get("id") # Return the site ID
return site_id

def get_drive_id(self, site_id):
"""
This function retrieves the IDs and names of all drives associated with a specified SharePoint site.
Parameters:
site_id (str): The ID of the SharePoint site.
Returns:
list: A list of dictionaries. Each dictionary represents a drive on the SharePoint site.
Each dictionary contains the following keys:
- 'id': The ID of the drive.
- 'name': The name of the drive.
"""

# Retrieve drive IDs and names associated with a site
try:
drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
response = requests.get(drives_url, headers=self.headers)
drives = response.json().get("value", [])
drive_info = [
({"id": drive["id"], "name": drive["name"]}) for drive in drives
]
# print(f"Drive Info: {drive_info}")
return drive_info
except requests.exceptions.HTTPError as e:
print(
f"Error while retrieving document library ID from Microsoft Graph API, Error: {e}"
)
return []


if __name__ == "__main__":
pass
Loading

0 comments on commit fa7cc34

Please sign in to comment.