diff --git a/daft/io/_iceberg.py b/daft/io/_iceberg.py
index 0d3b1af102..c936779d3d 100644
--- a/daft/io/_iceberg.py
+++ b/daft/io/_iceberg.py
@@ -70,6 +70,30 @@ def read_iceberg(
pyiceberg_table: "PyIcebergTable",
io_config: Optional["IOConfig"] = None,
) -> DataFrame:
+ """Create a DataFrame from an Iceberg table
+
+ Example:
+ >>> import pyiceberg
+ >>>
+ >>> pyiceberg_table = pyiceberg.Table(...)
+ >>> df = daft.read_iceberg(pyiceberg_table)
+ >>>
+ >>> # Filters on this dataframe can now be pushed into
+ >>> # the read operation from Iceberg
+ >>> df = df.where(df["foo"] > 5)
+ >>> df.show()
+
+ .. NOTE::
+ This function requires the use of `PyIceberg `_, which is the Apache Iceberg's
+ official project for Python.
+
+ Args:
+ pyiceberg_table: Iceberg table created using the PyIceberg library
+ io_config: A custom IOConfig to use when accessing Iceberg object storage data. Defaults to None.
+
+ Returns:
+ DataFrame: a DataFrame with the schema converted from the specified Iceberg table
+ """
from daft.iceberg.iceberg_scan import IcebergScanOperator
io_config = (
diff --git a/docs/source/api_docs/creation.rst b/docs/source/api_docs/creation.rst
index e6e821a3a7..033095db97 100644
--- a/docs/source/api_docs/creation.rst
+++ b/docs/source/api_docs/creation.rst
@@ -20,71 +20,83 @@ Python Objects
from_pylist
from_pydict
-Arrow
-~~~~~
+Files
+-----
+
+.. _df-io-files:
+
+Parquet
+~~~~~~~
+
+.. _daft-read-parquet:
.. autosummary::
:nosignatures:
:toctree: doc_gen/io_functions
+ read_parquet
+
+CSV
+~~~
.. autosummary::
:nosignatures:
:toctree: doc_gen/io_functions
- from_arrow
+ read_csv
-Pandas
-~~~~~~
+JSON
+~~~~
.. autosummary::
:nosignatures:
:toctree: doc_gen/io_functions
- from_pandas
+ read_json
-File Paths
-~~~~~~~~~~
+Data Catalogs
+-------------
+
+Apache Iceberg
+^^^^^^^^^^^^^^
.. autosummary::
:nosignatures:
:toctree: doc_gen/io_functions
- from_glob_path
+ read_iceberg
-Files
------
-
-.. _df-io-files:
+Arrow
+~~~~~
-Parquet
-~~~~~~~
+.. autosummary::
+ :nosignatures:
+ :toctree: doc_gen/io_functions
-.. _daft-read-parquet:
.. autosummary::
:nosignatures:
:toctree: doc_gen/io_functions
- read_parquet
+ from_arrow
-CSV
-~~~
+Pandas
+~~~~~~
.. autosummary::
:nosignatures:
:toctree: doc_gen/io_functions
- read_csv
+ from_pandas
-JSON
-~~~~
+File Paths
+~~~~~~~~~~
.. autosummary::
:nosignatures:
:toctree: doc_gen/io_functions
- read_json
+ from_glob_path
Integrations
------------
diff --git a/docs/source/user_guide/basic_concepts/read-and-write.rst b/docs/source/user_guide/basic_concepts/read-and-write.rst
index 5828cb847a..e4528cd5d8 100644
--- a/docs/source/user_guide/basic_concepts/read-and-write.rst
+++ b/docs/source/user_guide/basic_concepts/read-and-write.rst
@@ -34,6 +34,11 @@ Daft supports file paths to a single file, a directory of files, and wildcards.
To learn more about each of these constructors, as well as the options that they support, consult the API documentation on :ref:`creating DataFrames from files `.
+From Data Catalogs
+^^^^^^^^^^^^^^^^^^
+
+If you use catalogs such as Apache Iceberg or Hive, you may wish to consult our user guide on integrations with Data Catalogs: :doc:`Daft integration with Data Catalogs <../integrations/data_catalogs>`.
+
From File Paths
^^^^^^^^^^^^^^^
diff --git a/docs/source/user_guide/index.rst b/docs/source/user_guide/index.rst
index ffc722ab1b..d8e719f518 100644
--- a/docs/source/user_guide/index.rst
+++ b/docs/source/user_guide/index.rst
@@ -9,6 +9,7 @@ Daft User Guide
basic_concepts
daft_in_depth
poweruser
+ integrations
tutorials
Welcome to **Daft**!
@@ -61,6 +62,11 @@ Core Daft concepts all Daft users will find useful to understand deeply.
Become a true Daft Poweruser! This section explores advanced topics to help you configure Daft for specific application environments, improve reliability and optimize for performance.
+:doc:`Integrations `
+**********************************
+
+Learn how to use Daft's integrations with other technologies such as Ray Datasets or Apache Iceberg.
+
:doc:`Tutorials `
****************************
diff --git a/docs/source/user_guide/integrations.rst b/docs/source/user_guide/integrations.rst
new file mode 100644
index 0000000000..4a53a31a07
--- /dev/null
+++ b/docs/source/user_guide/integrations.rst
@@ -0,0 +1,6 @@
+Integrations
+============
+
+.. toctree::
+
+ integrations/data_catalogs
diff --git a/docs/source/user_guide/integrations/data_catalogs.rst b/docs/source/user_guide/integrations/data_catalogs.rst
new file mode 100644
index 0000000000..3dee9f8c06
--- /dev/null
+++ b/docs/source/user_guide/integrations/data_catalogs.rst
@@ -0,0 +1,43 @@
+Data Catalogs
+=============
+
+**Data Catalogs** are services that provide access to **Tables** of data. **Tables** are powerful abstractions for large datasets in storage, providing many benefits over naively storing data as just a bunch of CSV/Parquet files.
+
+There are many different **Table Formats** that are employed by Data Catalogs. These table formats will differ implementation and capabilities, but will often provide advantages such as:
+
+1. **Schema:** what data do these files contain?
+2. **Partitioning Specification:** how is the data organized?
+3. **Statistics/Metadata:** how many rows does each file contain, and what are the min/max values of each files' columns?
+4. **ACID compliance:** updates to the table are atomic
+
+.. NOTE::
+ The names of Table Formats and their Data Catalogs are often used interchangeably.
+
+ For example, "Apache Iceberg" often refers to both the Data Catalog and its Table Format.
+
+ You can retrieve an **Apache Iceberg Table** from an **Apache Iceberg REST Data Catalog**.
+
+ However, some Data Catalogs allow for many different underlying Table Formats. For example, you can request both an **Apache Iceberg Table** or a **Hive Table** from an **AWS Glue Data Catalog**.
+
+Why use Data Catalogs?
+----------------------
+
+Daft can effectively leverage the statistics and metadata provided by these Data Catalogs' Tables to dramatically speed up queries.
+
+This is accomplished by techniques such as:
+
+1. **Partition pruning:** ignore files where their partition values don't match filter predicates
+2. **Schema retrieval:** convert the schema provided by the data catalog into a Daft schema instead of sampling a schema from the data
+3. **Metadata execution**: utilize metadata such as row counts to read the bare minimum amount of data necessary from storage
+
+Data Catalog Integrations
+-------------------------
+
+Apache Iceberg
+^^^^^^^^^^^^^^
+
+Apache Iceberg is an open-sourced table format originally developed at Netflix for large-scale analytical datasets.
+
+To read from the Apache Iceberg table format, use the :func:`daft.read_iceberg` function.
+
+We integrate closely with `PyIceberg `_ (the official Python implementation for Apache Iceberg) and allow the reading of Daft dataframes from PyIceberg's Table objects.