jump-dev · odow · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024
diff --git a/Project.toml b/Project.toml
@@ -9,32 +9,36 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 MutableArithmetics = "d8a4904e-b15c-11e9-3269-09a3773c0cb0"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [weakdeps]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 
 [extensions]
+JuMPDataFramesExt = "DataFrames"
 JuMPDimensionalDataExt = "DimensionalData"
 
 [compat]
+DataFrames = "1"
 DimensionalData = "0.24, 0.25, 0.26.2, 0.27"
 LinearAlgebra = "<0.0.1, 1.6"
 MacroTools = "0.5"
 MathOptInterface = "1.25.2"
 MutableArithmetics = "1.1"
 OrderedCollections = "1"
-Printf = "<0.0.1, 1.6"
 PrecompileTools = "1"
+Printf = "<0.0.1, 1.6"
 SparseArrays = "<0.0.1, 1.6"
 Test = "<0.0.1, 1.6"
 julia = "1.6"
 
 [extras]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["DimensionalData", "Test"]
+test = ["DataFrames", "DimensionalData", "Test"]
diff --git a/docs/make.jl b/docs/make.jl
@@ -195,6 +195,7 @@ for (solver, data) in TOML.parsefile(joinpath(@__DIR__, "packages.toml"))
 end
 push!(
     _LIST_OF_EXTENSIONS,
+    "JuliaData/DataFrames.jl" => "extensions/DataFrames.md",
     "rafaqz/DimensionalData.jl" => "extensions/DimensionalData.md",
 )
 

diff --git a/docs/src/extensions/DataFrames.md b/docs/src/extensions/DataFrames.md
@@ -0,0 +1,153 @@
+# DataFrames.jl
+
+[DataFrames.jl](https://github.com/JuliaData/DataFrames.jl) provides tools for
+working with in-memory tabular data in Julia.
+
+!!! compat
+    Using the DataFrames extension with JuMP requires Julia v1.9 or later.
+
+The DataFrames extension in JuMP lets you construct a `DataFrames.DataFrame` as
+a container in the JuMP macros.
+
+## License
+
+DataFrames.jl is licensed under the [MIT license](https://github.com/JuliaData/DataFrames.jl/blob/main/LICENSE.md).
+
+## Installation
+
+Install DataFrames using `Pkg.add`:
+
+```julia
+import Pkg
+Pkg.add("DataFrames")
+```
+
+## Use with JuMP
+
+Activate the extension by loading both JuMP and DataFrames:
+
+```jldoctest ext_data_frames
+julia> using JuMP, DataFrames
+```
+
+Then, pass `container = DataFrames.DataFrame` in the [`@variable`](@ref),
+[`@constraint`](@ref), or [`@expression`](@ref) macros:
+
+```jldoctest ext_data_frames
+julia> model = Model();
+
+julia> @variable(
+           model,
+           x[i = 2:4, j = ["a", "b"]] >= i,
+           container = DataFrames.DataFrame,
+       )
+6×3 DataFrame
+ Row │ i      j       value
+     │ Int64  String  GenericV…
+─────┼──────────────────────────
+   1 │     2  a       x[2,a]
+   2 │     3  a       x[3,a]
+   3 │     4  a       x[4,a]
+   4 │     2  b       x[2,b]
+   5 │     3  b       x[3,b]
+   6 │     4  b       x[4,b]
+```
+
+Here `x` is a `DataFrames.DataFrame` array object, so operations use the
+DataFrames syntax:
+
+```jldoctest ext_data_frames
+julia> x[x.j .== "a", [:i, :value]]
+3×2 DataFrame
+ Row │ i      value
+     │ Int64  GenericV…
+─────┼──────────────────
+   1 │     2  x[2,a]
+   2 │     3  x[3,a]
+   3 │     4  x[4,a]
+
+julia> DataFrames.unstack(x, :i, :j, :value)
+3×3 DataFrame
+ Row │ i      a           b
+     │ Int64  GenericV…?  GenericV…?
+─────┼───────────────────────────────
+   1 │     2  x[2,a]      x[2,b]
+   2 │     3  x[3,a]      x[3,b]
+   3 │     4  x[4,a]      x[4,b]
+```
+
+You can use `container = DataFrames.DataFrame` in the [`@expression`](@ref)
+macro:
+
+```jldoctest ext_data_frames
+julia> @expression(
+           model,
+           expr[j = ["a", "b"]],
+           sum(x[x.j .== j, :value]),
+           container = DataFrames.DataFrame,
+       )
+2×2 DataFrame
+ Row │ j       value
+     │ String  AffExpr
+─────┼──────────────────────────────────
+   1 │ a       x[2,a] + x[3,a] + x[4,a]
+   2 │ b       x[2,b] + x[3,b] + x[4,b]
+```
+
+and in [`@constraint`](@ref):
+
+```jldoctest ext_data_frames
+julia> @constraint(
+           model,
+           [j = ["a", "b"]],
+           sum(x[x.j .== j, :value]) <= 1,
+           container = DataFrames.DataFrame,
+       )
+2×2 DataFrame
+ Row │ j       value
+     │ String  Constrai…
+─────┼──────────────────────────────────────
+   1 │ a       x[2,a] + x[3,a] + x[4,a] ≤ 1
+   2 │ b       x[2,b] + x[3,b] + x[4,b] ≤ 1
+```
+
+### DataFrame-native syntax
+
+While you can use indexing in JuMP's `@expression` and `@constraint` macros, it
+may be more convenient to use DataFrames.jl split-apply-combine framework. For
+example, `expr` can be equivalently written as:
+
+```jldoctest ext_data_frames
+julia> expr2 = model[:expr2] = DataFrames.combine(
+           DataFrames.groupby(x, :j),
+           :value => sum => :value,
+       )
+2×2 DataFrame
+ Row │ j       value
+     │ String  AffExpr
+─────┼──────────────────────────────────
+   1 │ a       x[2,a] + x[3,a] + x[4,a]
+   2 │ b       x[2,b] + x[3,b] + x[4,b]
+```
+
+and the constraint could be written as
+
+```jldoctest ext_data_frames
+julia> df_constraint(v) = @constraint(model, sum(v) <= 1);
+
+julia> DataFrames.combine(
+           DataFrames.groupby(x, :j),
+           :value => df_constraint => :value,
+       )
+2×2 DataFrame
+ Row │ j       value
+     │ String  Constrai…
+─────┼──────────────────────────────────────
+   1 │ a       x[2,a] + x[3,a] + x[4,a] ≤ 1
+   2 │ b       x[2,b] + x[3,b] + x[4,b] ≤ 1
+```
+
+## Documentation
+
+See the [DataFrames.jl documentation](https://dataframes.juliadata.org/stable/)
+for more details on the syntax and features of `DataFrames.DataFrame`.
diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl
@@ -23,11 +23,10 @@ using JuMP
 import DataFrames
 import HiGHS
 import SQLite
+import SQLite: DBInterface
 import Tables
 import Test
 
-const DBInterface = SQLite.DBInterface
-
 # ## Formulation
 
 # The multi-commondity flow problem is a simple extension of
@@ -119,58 +118,82 @@ products =
 
 model = Model(HiGHS.Optimizer)
 set_silent(model)
-@variable(model, x[origins, destinations, products] >= 0)
+@variable(
+    model,
+    x[origin in origins, destination in destinations, product in products] >= 0,
+    container = DataFrames.DataFrame,
+)
 
 # One approach when working with databases is to extract all of the data into a
-# Julia datastructure. For example, let's pull the cost table into a DataFrame
-# and then construct our objective by iterating over the rows of the DataFrame:
+# Julia datastructure. For example, let's pull the cost table into a DataFrame:
 
 cost = DBInterface.execute(db, "SELECT * FROM cost") |> DataFrames.DataFrame
-@objective(
-    model,
-    Max,
-    sum(r.cost * x[r.origin, r.destination, r.product] for r in eachrow(cost)),
-);
 
-# If we don't want to use a DataFrame, we can use a `Tables.rowtable` instead:
+# and then join the decision variables:
 
-supply = DBInterface.execute(db, "SELECT * FROM supply") |> Tables.rowtable
-for r in supply
-    @constraint(model, sum(x[r.origin, :, r.product]) <= r.supply)
+function natural_join(left, right)
+    on_names = intersect(names(left), names(right))
+    return DataFrames.innerjoin(left, right; on = on_names)
 end
 
-# Another approach is to execute the query, and then to iterate through the rows
-# of the query using `Tables.rows`:
+cost_x = natural_join(cost, x)
+
+# We've defined a new function, `natural_join`, to simplify the process of
+# joining two DataFrames. This function acts like the `NATURAL JOIN` statement in
+# SQL.
 
-demand = DBInterface.execute(db, "SELECT * FROM demand")
-for r in Tables.rows(demand)
-    @constraint(model, sum(x[:, r.destination, r.product]) == r.demand)
+# Our objective is the inner product of two columns:
+
+@objective(model, Max, cost_x.cost' * cost_x.value);
+
+# The supply constraint is more complicated. A useful utility is a function that
+# sums the `.value` column after grouping on a set of columns:
+
+function sum_value_by(df, cols)
+    gdf = DataFrames.groupby(df, cols)
+    return DataFrames.combine(gdf, :value => sum => :value)
 end
 
-# !!! warning
-#     Iterating through the rows of a query result works by incrementing a
-#     cursor inside the database. As a consequence, you cannot call
-#     `Tables.rows` twice on the same query result.
+# Here is it in action:
+
+sum_value_by(x, [:origin, :product])
+
+# The constraint that the supply must be less than or equal to a capacity can
+# now be written as:
+
+supply = natural_join(
+    DBInterface.execute(db, "SELECT * FROM supply") |> DataFrames.DataFrame,
+    sum_value_by(x, [:origin, :product]),
+)
+@constraint(model, supply.value .<= supply.supply);
+
+# The demand constraint ca be written similarly:
+
+demand = natural_join(
+    DBInterface.execute(db, "SELECT * FROM demand") |> DataFrames.DataFrame,
+    sum_value_by(x, [:destination, :product]),
+)
+@constraint(model, demand.value .== demand.demand);
 
 # The SQLite queries can be arbitrarily complex. For example, here's a query
 # which builds every possible origin-destination pair:
 
-od_pairs = DBInterface.execute(
-    db,
-    """
-    SELECT a.location as 'origin',
-           b.location as 'destination'
-    FROM locations a
-    INNER JOIN locations b
-    ON a.type = 'origin' AND b.type = 'destination'
-    """,
-)
+od_pairs =
+    DBInterface.execute(
+        db,
+        """
+        SELECT a.location as 'origin',
+            b.location as 'destination'
+        FROM locations a
+        INNER JOIN locations b
+        ON a.type = 'origin' AND b.type = 'destination'
+        """,
+    ) |> DataFrames.DataFrame
 
 # With a constraint that we cannot send more than 625 units between each pair:
 
-for r in Tables.rows(od_pairs)
-    @constraint(model, sum(x[r.origin, r.destination, :]) <= 625)
-end
+od = natural_join(od_pairs, sum_value_by(x, [:origin, :destination]))
+@constraint(model, od.value .<= 625);
 
 # ## Solution
 
@@ -181,12 +204,7 @@ Test.@test is_solved_and_feasible(model)
 Test.@test objective_value(model) == 225_700.0      #src
 solution_summary(model)
 
-# and print the solution:
+# and obtain the solution:
 
-begin
-    println("         ", join(products, ' '))
-    for o in origins, d in destinations
-        v = lpad.([round(Int, value(x[o, d, p])) for p in products], 5)
-        println(o, " ", d, " ", join(replace.(v, "   0" => "  . "), " "))
-    end
-end
+x.value = value.(x.value)
+x[x.value.>0, :]
diff --git a/ext/JuMPDataFramesExt.jl b/ext/JuMPDataFramesExt.jl
@@ -0,0 +1,23 @@
+#  Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
+#  This Source Code Form is subject to the terms of the Mozilla Public
+#  License, v. 2.0. If a copy of the MPL was not distributed with this
+#  file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+module JuMPDataFramesExt
+
+import DataFrames
+import JuMP
+
+function JuMP.Containers.container(
+    f::Function,
+    indices,
+    ::Type{DataFrames.DataFrame},
+    names::AbstractVector,
+)
+    rows = vec(collect(indices))
+    df = DataFrames.DataFrame(NamedTuple{tuple(names...)}(arg) for arg in rows)
+    df.value = [f(arg...) for arg in rows]
+    return df
+end
+
+end #module