Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report physical disks through the inventory system #5145

Merged
merged 17 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,16 @@ impl omicron_common::api::external::ClientError for types::Error {
}
}

impl From<types::DiskIdentity> for omicron_common::disk::DiskIdentity {
fn from(identity: types::DiskIdentity) -> Self {
Self {
vendor: identity.vendor,
serial: identity.serial,
model: identity.model,
}
}
}

impl From<omicron_common::api::internal::nexus::InstanceRuntimeState>
for types::InstanceRuntimeState
{
Expand Down
16 changes: 15 additions & 1 deletion common/src/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,22 @@

//! Disk related types shared among crates

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

/// Uniquely identifies a disk.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
#[derive(
Debug,
Clone,
PartialEq,
Eq,
Hash,
Ord,
PartialOrd,
Serialize,
Deserialize,
JsonSchema,
)]
pub struct DiskIdentity {
pub vendor: String,
pub serial: String,
Expand Down
49 changes: 46 additions & 3 deletions nexus/db-model/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic};
use crate::schema::{
hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error,
inv_omicron_zone, inv_omicron_zone_nic, inv_root_of_trust,
inv_root_of_trust_page, inv_service_processor, inv_sled_agent,
inv_sled_omicron_zones, sw_caboose, sw_root_of_trust_page,
inv_omicron_zone, inv_omicron_zone_nic, inv_physical_disk,
inv_root_of_trust, inv_root_of_trust_page, inv_service_processor,
inv_sled_agent, inv_sled_omicron_zones, sw_caboose, sw_root_of_trust_page,
};
use crate::PhysicalDiskKind;
use crate::{
impl_enum_type, ipv6, ByteCount, Generation, MacAddr, Name, SqlU16, SqlU32,
SqlU8,
Expand Down Expand Up @@ -652,6 +653,48 @@ impl InvSledAgent {
}
}

/// See [`nexus_types::inventory::PhysicalDisk`].
#[derive(Queryable, Clone, Debug, Selectable, Insertable)]
#[diesel(table_name = inv_physical_disk)]
pub struct InvPhysicalDisk {
pub inv_collection_id: Uuid,
pub sled_id: Uuid,
pub vendor: String,
pub model: String,
pub serial: String,
pub variant: PhysicalDiskKind,
}

impl InvPhysicalDisk {
pub fn new(
inv_collection_id: Uuid,
sled_id: Uuid,
disk: nexus_types::inventory::PhysicalDisk,
) -> Self {
Self {
inv_collection_id,
sled_id,
vendor: disk.identity.vendor,
model: disk.identity.model,
serial: disk.identity.serial,
variant: disk.variant.into(),
}
}
}

impl From<InvPhysicalDisk> for nexus_types::inventory::PhysicalDisk {
fn from(disk: InvPhysicalDisk) -> Self {
Self {
identity: omicron_common::disk::DiskIdentity {
vendor: disk.vendor,
serial: disk.serial,
model: disk.model,
},
variant: disk.variant.into(),
}
}
}

/// See [`nexus_types::inventory::OmicronZonesFound`].
#[derive(Queryable, Clone, Debug, Selectable, Insertable)]
#[diesel(table_name = inv_sled_omicron_zones)]
Expand Down
15 changes: 14 additions & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion;
///
/// This should be updated whenever the schema is changed. For more details,
/// refer to: schema/crdb/README.adoc
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(37, 0, 1);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(38, 0, 0);

table! {
disk (id) {
Expand Down Expand Up @@ -1358,6 +1358,19 @@ table! {
}
}

table! {
inv_physical_disk (inv_collection_id, sled_id, vendor, model, serial) {
inv_collection_id -> Uuid,
sled_id -> Uuid,

vendor -> Text,
model -> Text,
serial -> Text,

variant -> crate::PhysicalDiskKindEnum,
}
}

table! {
inv_sled_omicron_zones (inv_collection_id, sled_id) {
inv_collection_id -> Uuid,
Expand Down
144 changes: 144 additions & 0 deletions nexus/db-queries/src/db/datastore/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use nexus_db_model::InvCollection;
use nexus_db_model::InvCollectionError;
use nexus_db_model::InvOmicronZone;
use nexus_db_model::InvOmicronZoneNic;
use nexus_db_model::InvPhysicalDisk;
use nexus_db_model::InvRootOfTrust;
use nexus_db_model::InvRotPage;
use nexus_db_model::InvServiceProcessor;
Expand Down Expand Up @@ -125,6 +126,25 @@ impl DataStore {
))
})
.collect::<Result<Vec<_>, Error>>()?;
// Pull disks out of all sled agents
let disks: Vec<_> = collection
.sled_agents
.iter()
.flat_map(|(sled_id, sled_agent)| {
sled_agent
.disks
.iter()
.map(|disk| {
InvPhysicalDisk::new(
collection_id,
*sled_id,
disk.clone(),
)
})
.collect::<Vec<_>>()
smklein marked this conversation as resolved.
Show resolved Hide resolved
})
.collect();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tiny nit: I think you can drop this collect() (and the corresponding : Vec<_> type hint), make this mut, and then remove the disks.into_iter() call on line 659.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This causes the following compilation error:

error: implementation of `FnOnce` is not general enough
  --> nexus/src/app/background/inventory_collection.rs:58:9
   |
58 | /         async {
59 | |             match inventory_activate(
60 | |                 opctx,
61 | |                 &self.datastore,
...  |
88 | |         }
89 | |         .boxed()
   | |________________^ implementation of `FnOnce` is not general enough
   |
   = note: closure with signature `fn(&'0 nexus_types::inventory::PhysicalDisk) -> InvPhysicalDisk` must implement `FnOnce<(&'1 nexus_types::inventory::PhysicalDisk,)>`, for any two lifetimes `'0` and `'1`...
   = note: ...but it actually implements `FnOnce<(&nexus_types::inventory::PhysicalDisk,)>`

I believe the usage of disks.into_iter() is called within a transaction, which is where the FnOnce comes from?

I think I'm going to keep this pattern of "collect it into Vec, pass that Vec to the FnOnce" for the moment, because the lifetime complexity doesn't seem worth trying to optimize right now

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Grrrr, this is very annoying. I checked that diff against nexus-db-queries and it was fine, but didn't check whether it cause compilation errors down the line. That feels like a bug - if my function signature hasn't changed, why can implementation changes cause client breakage?

Copy link
Contributor

@jgallagher jgallagher Mar 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks to @cbiffle who found we're hitting rust-lang/rust#99492 here; this ought to work. I found a couple weird looking workarounds using hints from that issue:

@@ -127,15 +127,10 @@ impl DataStore {
             })
             .collect::<Result<Vec<_>, Error>>()?;
         // Pull disks out of all sled agents
-        let disks: Vec<_> = collection
-            .sled_agents
-            .iter()
-            .flat_map(|(sled_id, sled_agent)| {
-                sled_agent.disks.iter().map(|disk| {
-                    InvPhysicalDisk::new(collection_id, *sled_id, disk.clone())
-                })
-            })
-            .collect();
+        let mut disks =
+            collection.sled_agents.iter().flat_map(|(sled_id, sled_agent)| {
+                std::iter::repeat(sled_id).zip(sled_agent.disks.iter())
+            });

         // Partition the sled agents into those with an associated baseboard id
         // and those without one.  We handle these pretty differently.
@@ -656,10 +651,18 @@ impl DataStore {
                 use db::schema::inv_physical_disk::dsl;

                 let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap();
-                let mut disks = disks.into_iter();
                 loop {
-                    let some_disks =
-                        disks.by_ref().take(batch_size).collect::<Vec<_>>();
+                    let some_disks = disks
+                        .by_ref()
+                        .take(batch_size)
+                        .map(|(&sled_id, disk)| {
+                            InvPhysicalDisk::new(
+                                collection_id,
+                                sled_id,
+                                disk.clone(),
+                            )
+                        })
+                        .collect::<Vec<_>>();
                     if some_disks.is_empty() {
                         break;
                     }

or if we don't like collection_id floating around, it can go in the repeat too:

@@ -127,15 +127,11 @@ impl DataStore {
             })
             .collect::<Result<Vec<_>, Error>>()?;
         // Pull disks out of all sled agents
-        let disks: Vec<_> = collection
-            .sled_agents
-            .iter()
-            .flat_map(|(sled_id, sled_agent)| {
-                sled_agent.disks.iter().map(|disk| {
-                    InvPhysicalDisk::new(collection_id, *sled_id, disk.clone())
-                })
-            })
-            .collect();
+        let mut disks =
+            collection.sled_agents.iter().flat_map(|(sled_id, sled_agent)| {
+                std::iter::repeat((collection_id, *sled_id))
+                    .zip(sled_agent.disks.iter())
+            });

         // Partition the sled agents into those with an associated baseboard id
         // and those without one.  We handle these pretty differently.
@@ -656,10 +652,18 @@ impl DataStore {
                 use db::schema::inv_physical_disk::dsl;

                 let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap();
-                let mut disks = disks.into_iter();
                 loop {
-                    let some_disks =
-                        disks.by_ref().take(batch_size).collect::<Vec<_>>();
+                    let some_disks = disks
+                        .by_ref()
+                        .take(batch_size)
+                        .map(|((collection_id, sled_id), disk)| {
+                            InvPhysicalDisk::new(
+                                collection_id,
+                                sled_id,
+                                disk.clone(),
+                            )
+                        })
+                        .collect::<Vec<_>>();
                     if some_disks.is_empty() {
                         break;
                     }

I don't know that either of these are worth saving the intermediate .collect(), but at least we (kind of) understand it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I ran into this yesterday. It's a pretty bad bug in rustc.


// Partition the sled agents into those with an associated baseboard id
// and those without one. We handle these pretty differently.
let (sled_agents_baseboards, sled_agents_no_baseboards): (
Expand Down Expand Up @@ -639,6 +659,20 @@ impl DataStore {
}
}

// Insert rows for all the physical disks we found.
{
use db::schema::inv_physical_disk::dsl;

let mut iter =
disks.chunks(SQL_BATCH_SIZE.get().try_into().unwrap());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general I like the idea of inserting in batches. But this is all going into one transaction so I'm not sure it's any better in that case.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The batch size is still fairly large 1,000, so it's unlikely we'll be looping much regardless on a single rack or two. But I could see this blowing past the size of a "single SQL statement" if we consider it for larger multi-rack cases?

while let Some(some_disks) = iter.next() {
smklein marked this conversation as resolved.
Show resolved Hide resolved
let _ = diesel::insert_into(dsl::inv_physical_disk)
.values(some_disks.to_vec())
.execute_async(&conn)
.await?;
}
}

// Insert rows for the sled agents that we found. In practice, we'd
// expect these to all have baseboards (if using Oxide hardware) or
// none have baseboards (if not).
Expand Down Expand Up @@ -1029,6 +1063,7 @@ impl DataStore {
ncabooses,
nrot_pages,
nsled_agents,
nphysical_disks,
nsled_agent_zones,
nzones,
nnics,
Expand Down Expand Up @@ -1100,6 +1135,17 @@ impl DataStore {
.await?
};

// Remove rows for physical disks found.
let nphysical_disks = {
use db::schema::inv_physical_disk::dsl;
diesel::delete(
dsl::inv_physical_disk
.filter(dsl::inv_collection_id.eq(collection_id)),
)
.execute_async(&conn)
.await?
};

// Remove rows associated with Omicron zones
let nsled_agent_zones = {
use db::schema::inv_sled_omicron_zones::dsl;
Expand Down Expand Up @@ -1149,6 +1195,7 @@ impl DataStore {
ncabooses,
nrot_pages,
nsled_agents,
nphysical_disks,
nsled_agent_zones,
nzones,
nnics,
Expand All @@ -1171,6 +1218,7 @@ impl DataStore {
"ncabooses" => ncabooses,
"nrot_pages" => nrot_pages,
"nsled_agents" => nsled_agents,
"nphysical_disks" => nphysical_disks,
"nsled_agent_zones" => nsled_agent_zones,
"nzones" => nzones,
"nnics" => nnics,
Expand Down Expand Up @@ -1391,6 +1439,91 @@ impl DataStore {
rows
};

// Mapping of "Sled ID" -> "All disks reported by that sled"
let physical_disks: BTreeMap<
Uuid,
Vec<nexus_types::inventory::PhysicalDisk>,
> = {
use db::schema::inv_physical_disk::dsl;

let mut disks = BTreeMap::<
Uuid,
Vec<nexus_types::inventory::PhysicalDisk>,
>::new();
let mut paginator = Paginator::new(batch_size);
while let Some(p) = paginator.next() {
// The primary key of physical disks is more than two columns,
// which complicates a bit of the pagination query. This
// code below is effectively an implementation of
// "paginated_multicolumn" for these four columns (sled_id,
// vendor, model, serial).
type Marker = (Uuid, String, String, String);

let pagparams = &p.current_pagparams();
let mut query = dsl::inv_physical_disk
.into_boxed()
.limit(pagparams.limit.get().into())
.filter(dsl::inv_collection_id.eq(id));
let marker = pagparams.marker.map(|m: &Marker| m.clone());
if let Some((sled_id, vendor, serial, model)) = marker {
query = query.filter(
dsl::inv_collection_id
.eq(id)
.and(dsl::sled_id.eq(sled_id))
.and(dsl::vendor.eq(vendor.clone()))
.and(dsl::model.eq(model.clone()))
.and(dsl::serial.gt(serial.clone())),
);
query = query.or_filter(
dsl::inv_collection_id
.eq(id)
.and(dsl::sled_id.eq(sled_id))
.and(dsl::vendor.eq(vendor.clone()))
.and(dsl::model.gt(model.clone())),
);
query = query.or_filter(
dsl::inv_collection_id
.eq(id)
.and(dsl::sled_id.eq(sled_id))
.and(dsl::vendor.gt(vendor.clone())),
);
query = query.or_filter(
dsl::inv_collection_id
.eq(id)
.and(dsl::sled_id.gt(sled_id)),
);
}
query = query
.order_by(dsl::inv_collection_id.asc())
.then_order_by(dsl::sled_id.asc())
.then_order_by(dsl::vendor.asc())
.then_order_by(dsl::model.asc())
.then_order_by(dsl::serial.asc());

let batch: Vec<_> = query
.select(InvPhysicalDisk::as_select())
.load_async(&*conn)
.await
.map_err(|e| {
public_error_from_diesel(e, ErrorHandler::Server)
})?;
paginator = p.found_batch(&batch, &|row| {
(
row.sled_id,
row.vendor.clone(),
row.serial.clone(),
row.model.clone(),
)
});

for disk in batch {
disks.entry(disk.sled_id).or_default().push(disk.into());
}
}

disks
};

// Collect the unique baseboard ids referenced by SPs, RoTs, and Sled
// Agents.
let baseboard_id_ids: BTreeSet<_> = sps
Expand Down Expand Up @@ -1494,6 +1627,10 @@ impl DataStore {
),
usable_physical_ram: s.usable_physical_ram.into(),
reservoir_size: s.reservoir_size.into(),
disks: physical_disks
.get(&sled_id)
.map(|disks| disks.to_vec())
.unwrap_or_default(),
};
Ok((sled_id, sled_agent))
})
Expand Down Expand Up @@ -1933,6 +2070,7 @@ mod test {
use nexus_types::inventory::RotPageWhich;
use omicron_common::api::external::Error;
use omicron_test_utils::dev;
use pretty_assertions::assert_eq;
use std::num::NonZeroU32;

struct CollectionCounts {
Expand Down Expand Up @@ -2384,6 +2522,12 @@ mod test {
.await
.unwrap();
assert_eq!(0, count);
let count = schema::inv_physical_disk::dsl::inv_physical_disk
.select(diesel::dsl::count_star())
.first_async::<i64>(&conn)
.await
.unwrap();
assert_eq!(0, count);
let count =
schema::inv_sled_omicron_zones::dsl::inv_sled_omicron_zones
.select(diesel::dsl::count_star())
Expand Down
1 change: 1 addition & 0 deletions nexus/deployment/src/blueprint_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,7 @@ pub mod test {
sled_id,
usable_hardware_threads: 10,
usable_physical_ram: ByteCount::from(1024 * 1024),
disks: vec![],
},
)
.unwrap();
Expand Down
6 changes: 6 additions & 0 deletions nexus/inventory/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ impl CollectionBuilder {
reservoir_size: inventory.reservoir_size,
time_collected: now_db_precision(),
sled_id,
disks: inventory.disks.into_iter().map(|d| d.into()).collect(),
};

if let Some(previous) = self.sleds.get(&sled_id) {
Expand Down Expand Up @@ -907,6 +908,11 @@ mod test {
let sled1_bb = sled1_agent.baseboard_id.as_ref().unwrap();
assert_eq!(sled1_bb.part_number, "model1");
assert_eq!(sled1_bb.serial_number, "s1");
assert_eq!(sled1_agent.disks.len(), 4);
assert_eq!(sled1_agent.disks[0].identity.vendor, "macrohard");
assert_eq!(sled1_agent.disks[0].identity.model, "box");
assert_eq!(sled1_agent.disks[0].identity.serial, "XXIV");

let sled4_agent = &collection.sled_agents[&sled_agent_id_extra];
let sled4_bb = sled4_agent.baseboard_id.as_ref().unwrap();
assert_eq!(sled4_bb.serial_number, "s4");
Expand Down
Loading
Loading