Skip to content

Commit

Permalink
Merge pull request #2110 from icristescu/expose_gc_commit
Browse files Browse the repository at this point in the history
Expose to the user the commit_key of the latest GC
  • Loading branch information
Ioana Cristescu authored Oct 12, 2022
2 parents 46e46e0 + e9292da commit af984bb
Show file tree
Hide file tree
Showing 16 changed files with 255 additions and 70 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@
- **irmin-pack**
- Add `Irmin_pack_unix.Stats.Latest_gc` which is now the parameter of GC's
`finished` callback (#2089, @Ngoguey42)
- Add `Gc.oldest_live_commit` which returns the key of the commit on which the
latest gc was called on. (#2110, @icristescu)

### Changed

- **irmin-pack**
- Upgraded on-disk format to version 4. (#2110, @icristescu)

### Fixed

## 3.4.2 (2022-10-06)
Expand Down
57 changes: 48 additions & 9 deletions src/irmin-pack/unix/control_file.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ include Control_file_intf
module Plv3 = struct
include Payload_v3

let of_bin_string = Irmin.Type.of_bin_string t |> Irmin.Type.unstage
end

module Plv4 = struct
include Payload_v4

let of_bin_string = Irmin.Type.of_bin_string t |> Irmin.Type.unstage
let to_bin_string = Irmin.Type.to_bin_string t |> Irmin.Type.unstage
end
Expand All @@ -29,10 +35,11 @@ module Version = Irmin_pack.Version
module Data (Io : Io.S) = struct
(** Type of what's encoded in the control file. The variant tag is encoded as
a [Version.t]. *)
type t = V3 of Plv3.t
type t = V3 of Plv3.t | V4 of Plv4.t

let to_bin_string = function
| V3 payload -> Version.to_bin `V3 ^ Plv3.to_bin_string payload
| V3 _ -> assert false
| V4 payload -> Version.to_bin `V4 ^ Plv4.to_bin_string payload

let of_bin_string s =
let open Result_syntax in
Expand All @@ -46,13 +53,18 @@ module Data (Io : Io.S) = struct
| None -> Error (`Unknown_major_pack_version left)
| Some `V3 when len > Io.page_size -> Error `Corrupted_control_file
| Some `V3 -> Ok `V3
| Some `V4 -> Ok `V4
| Some (`V1 | `V2) -> assert false
in
match version with
| `V3 -> (
match Plv3.of_bin_string right with
| Ok x -> Ok (V3 x)
| Error _ -> Error `Corrupted_control_file)
| `V4 -> (
match Plv4.of_bin_string right with
| Ok x -> Ok (V4 x)
| Error _ -> Error `Corrupted_control_file)
end

module Make (Io : Io.S) = struct
Expand All @@ -61,12 +73,38 @@ module Make (Io : Io.S) = struct

type t = { io : Io.t; mutable payload : Latest_payload.t }

let upgrade_v3_to_v4 (pl : Payload_v3.t) : Payload_v4.t =
let status =
match pl.status with
| From_v1_v2_post_upgrade x -> Payload_v4.From_v1_v2_post_upgrade x
| From_v3_no_gc_yet -> No_gc_yet
| From_v3_used_non_minimal_indexing_strategy ->
Used_non_minimal_indexing_strategy
| From_v3_gced x ->
Gced
{
suffix_start_offset = x.suffix_start_offset;
generation = x.generation;
latest_gc_target_offset = x.suffix_start_offset;
}
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
| T15 ->
(* Unreachable *)
assert false
in
{
dict_end_poff = pl.dict_end_poff;
suffix_end_poff = pl.suffix_end_poff;
status;
upgraded_from_v3_to_v4 = true;
}

let write io payload =
let s = Data.(to_bin_string (V3 payload)) in
let s = Data.(to_bin_string (V4 payload)) in

(* The data must fit inside a single page for atomic updates of the file.
This is only true for some file systems. This system will have to be
reworked for [V3]. *)
reworked for [V4]. *)
assert (String.length s <= Io.page_size);

Io.write_string io ~off:Int63.zero s
Expand All @@ -80,7 +118,10 @@ module Make (Io : Io.S) = struct
If [string] is larger than a page, it either means that the file is
corrupted or that the major version is not supported. Either way it will
be handled by [Data.of_bin_string]. *)
Data.of_bin_string string
let+ payload = Data.of_bin_string string in
match payload with
| V4 payload -> payload
| V3 payload -> upgrade_v3_to_v4 payload

let create_rw ~path ~overwrite payload =
let open Result_syntax in
Expand All @@ -91,8 +132,7 @@ module Make (Io : Io.S) = struct
let open_ ~path ~readonly =
let open Result_syntax in
let* io = Io.open_ ~path ~readonly in
let+ data = read io in
let payload = match data with Data.V3 payload -> payload in
let+ payload = read io in
{ io; payload }

let close t = Io.close t.io
Expand All @@ -103,8 +143,7 @@ module Make (Io : Io.S) = struct
let open Result_syntax in
if not @@ Io.readonly t.io then Error `Rw_not_allowed
else
let+ data = read t.io in
let payload = match data with Data.V3 payload -> payload in
let+ payload = read t.io in
t.payload <- payload

let set_payload t payload =
Expand Down
62 changes: 60 additions & 2 deletions src/irmin-pack/unix/control_file_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,64 @@ module Payload_v3 = struct
extensions *)
end

module Latest_payload = Payload_v3
module Payload_v4 = struct
type gced = {
suffix_start_offset : int63;
generation : int;
latest_gc_target_offset : int63;
}
[@@deriving irmin]
(** Similar to [from_v3_gced]. [latest_gc_target_offset] is the commit on
which the latest gc was called on. *)

(** [From_v1_v2_post_upgrade] similar to [Payload_v3.From_v1_v2_post_upgrade]
[No_gc_yet] corresponds to a pack store that was created using [`V3] or
above. It never underwent a GC.
[Used_non_minimal_indexing_strategy] corresponds to a pack store that was
created using [`V3] or above. It never underwent a GC and it will never be
possible to GC it because entries were pushed using a non-minimal indexing
strategy.
[Gced] is a [`V3] or [`V4] store that was GCed at least once.
The [T*] tags are provisional tags that the binary decoder is aware of and
that may in the future be used to add features to the [`V3] payload. *)
type status =
| From_v1_v2_post_upgrade of Payload_v3.from_v1_v2_post_upgrade
| No_gc_yet
| Used_non_minimal_indexing_strategy
| Gced of gced
| T1
| T2
| T3
| T4
| T5
| T6
| T7
| T8
| T9
| T10
| T11
| T12
| T13
| T14
| T15
[@@deriving irmin]

type t = {
dict_end_poff : int63;
suffix_end_poff : int63;
status : status;
upgraded_from_v3_to_v4 : bool;
}
[@@deriving irmin]
(** Similar to [`V3] payload. [upgraded_from_v3_to_v4] recalls if the store
was originally created in [`V3]. *)
end

module Latest_payload = Payload_v4

module type S = sig
(** Abstraction for irmin-pack's control file.
Expand Down Expand Up @@ -192,8 +249,9 @@ module type S = sig
end

module type Sigs = sig
module Latest_payload = Payload_v3
module Latest_payload = Payload_v4
module Payload_v3 = Payload_v3
module Payload_v4 = Payload_v4

module type S = S

Expand Down
6 changes: 3 additions & 3 deletions src/irmin-pack/unix/dispatcher.ml
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ module Make (Fm : File_manager.S with module Io = Io.Unix) :
let suffix_start_offset t =
let pl = Control.payload (Fm.control t.fm) in
match pl.status with
| Payload.From_v1_v2_post_upgrade _
| From_v3_used_non_minimal_indexing_strategy | From_v3_no_gc_yet ->
| Payload.From_v1_v2_post_upgrade _ | Used_non_minimal_indexing_strategy
| No_gc_yet ->
Int63.zero
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
| T15 ->
assert false
| From_v3_gced { suffix_start_offset; _ } -> suffix_start_offset
| Gced { suffix_start_offset; _ } -> suffix_start_offset

(* The suffix only know the real offsets, it is in the dispatcher that global
offsets are translated into real ones (i.e. in prefix or suffix offsets). *)
Expand Down
36 changes: 27 additions & 9 deletions src/irmin-pack/unix/ext.ml
Original file line number Diff line number Diff line change
Expand Up @@ -250,14 +250,6 @@ module Maker (Config : Conf.S) = struct
(Irmin.Type.to_string XKey.t commit_key))
| Some (k, _kind) -> Ok k)
in
let offset =
let state : _ Pack_key.state = Pack_key.inspect commit_key in
match state with
| Direct x ->
let len = x.length |> Int63.of_int in
Int63.Syntax.(x.offset + len)
| Indexed _ -> assert false
in
let root = Conf.root t.config in
let* () =
if not (File_manager.gc_allowed t.fm) then Error `Gc_disallowed
Expand All @@ -266,7 +258,7 @@ module Maker (Config : Conf.S) = struct
let current_generation = File_manager.generation t.fm in
let next_generation = current_generation + 1 in
let gc =
Gc.v ~root ~generation:next_generation ~unlink ~offset
Gc.v ~root ~generation:next_generation ~unlink
~dispatcher:t.dispatcher ~fm:t.fm ~contents:t.contents
~node:t.node ~commit:t.commit commit_key
in
Expand Down Expand Up @@ -318,6 +310,31 @@ module Maker (Config : Conf.S) = struct
| Some { use_auto_finalisation = true; _ } ->
let* _ = finalise_exn ~wait:false t in
Lwt.return_unit

let latest_gc_target t =
let pl = Control.payload (File_manager.control t.fm) in
match pl.status with
| From_v1_v2_post_upgrade _ | Used_non_minimal_indexing_strategy
| No_gc_yet ->
None
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13
| T14 | T15 ->
assert false
| Gced { latest_gc_target_offset = offset; _ } -> (
let entry =
Commit.CA.read_and_decode_entry_prefix ~off:offset
t.dispatcher
in
match Commit.CA.Entry_prefix.total_entry_length entry with
| None ->
(* Commits on which this operation is supported have a
length in their header. *)
assert false
| Some length ->
let key =
Pack_key.v_direct ~offset ~length ~hash:entry.hash
in
Some key)
end

let batch t f =
Expand Down Expand Up @@ -573,6 +590,7 @@ module Maker (Config : Conf.S) = struct
let is_allowed repo = File_manager.gc_allowed repo.X.Repo.fm
let cancel repo = X.Repo.Gc.cancel repo
let latest_gc_target = X.Repo.Gc.latest_gc_target
end
module Traverse_pack_file = Traverse_pack_file.Make (struct
Expand Down
Loading

0 comments on commit af984bb

Please sign in to comment.