Skip to content

Commit

Permalink
feat: support syncing to PTP clocks
Browse files Browse the repository at this point in the history
Also abstract away from NTP types.

Signed-off-by: Dmitry Sharshakov <[email protected]>
Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
dsseng authored and smira committed Mar 21, 2024
1 parent 7d43c9a commit 84ec8c1
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 39 deletions.
12 changes: 11 additions & 1 deletion hack/release.toml
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,20 @@ This is mostly implemented for extension services that log to syslog.
"""

[notes.ntp]
title = "NTP"
title = "Time Sync"
description = """\
Default NTP server was updated to be `time.cloudflare.com` instead of `pool.ntp.org`.
Default server is only used if the user does not specify any NTP servers in the configuration.
Talos Linux can now sync to PTP devices (e.g. provided by the hypervisor) skipping the network time servers.
In order to activate PTP sync, set `machine.time.servers` to the PTP device name (e.g. `/dev/ptp0`):
```yaml
machine:
time:
servers:
- /dev/ptp0
```
"""

[make_deps]
Expand Down
126 changes: 99 additions & 27 deletions internal/pkg/ntp/ntp.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ import (
"fmt"
"math/bits"
"net"
"os"
"reflect"
"slices"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -52,6 +54,13 @@ type Syncer struct {
AdjustTime AdjustTimeFunc
}

// Measurement is a struct containing correction data based on a time request.
type Measurement struct {
ClockOffset time.Duration
Leap ntp.LeapIndicator
Spike bool
}

// NewSyncer creates new Syncer with default configuration.
func NewSyncer(logger *zap.Logger, timeServers []string) *Syncer {
syncer := &Syncer{
Expand Down Expand Up @@ -169,9 +178,8 @@ func (syncer *Syncer) Run(ctx context.Context) {
}

spike := false

if resp != nil && resp.Validate() == nil {
spike = syncer.isSpike(resp)
if resp != nil {
spike = resp.Spike
}

switch {
Expand All @@ -181,17 +189,15 @@ func (syncer *Syncer) Run(ctx context.Context) {
case pollInterval == 0:
// first sync
pollInterval = syncer.MinPoll
case err != nil:
// error encountered, don't change the poll interval
case !spike && absDuration(resp.ClockOffset) > ExpectedAccuracy:
// huge offset, retry sync with minimum interval
pollInterval = syncer.MinPoll
case absDuration(resp.ClockOffset) < ExpectedAccuracy*100/25: // *0.25
case absDuration(resp.ClockOffset) < ExpectedAccuracy*25/100: // *0.25
// clock offset is within 25% of expected accuracy, increase poll interval
if pollInterval < syncer.MaxPoll {
pollInterval *= 2
}
case spike || absDuration(resp.ClockOffset) > ExpectedAccuracy*100/75: // *0.75
case spike || absDuration(resp.ClockOffset) > ExpectedAccuracy*75/100: // *0.75
// spike was detected or clock offset is too large, decrease poll interval
if pollInterval > syncer.MinPoll {
pollInterval /= 2
Expand All @@ -209,7 +215,7 @@ func (syncer *Syncer) Run(ctx context.Context) {
zap.Bool("spike", spike),
)

if resp != nil && resp.Validate() == nil && !spike {
if resp != nil && !spike {
err = syncer.adjustTime(resp.ClockOffset, resp.Leap, lastSyncServer, pollInterval)

if err == nil {
Expand All @@ -234,14 +240,14 @@ func (syncer *Syncer) Run(ctx context.Context) {
}
}

func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, resp *ntp.Response, err error) {
func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, measurement *Measurement, err error) {
lastSyncServer = syncer.getLastSyncServer()
failedServer := ""

if lastSyncServer != "" {
resp, err = syncer.queryServer(lastSyncServer)
measurement, err = syncer.queryServer(lastSyncServer)
if err != nil {
syncer.logger.Error(fmt.Sprintf("ntp query error with server %q", lastSyncServer), zap.Error(err))
syncer.logger.Error(fmt.Sprintf("time query error with server %q", lastSyncServer), zap.Error(err))

failedServer = lastSyncServer
lastSyncServer = ""
Expand All @@ -254,7 +260,7 @@ func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, resp *n

serverList, err = syncer.resolveServers(ctx)
if err != nil {
return lastSyncServer, resp, err
return lastSyncServer, measurement, err
}

for _, server := range serverList {
Expand All @@ -265,15 +271,15 @@ func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, resp *n

select {
case <-ctx.Done():
return lastSyncServer, resp, ctx.Err()
return lastSyncServer, measurement, ctx.Err()
case <-syncer.restartSyncCh:
return lastSyncServer, resp, nil
return lastSyncServer, measurement, nil
default:
}

resp, err = syncer.queryServer(server)
measurement, err = syncer.queryServer(server)
if err != nil {
syncer.logger.Error(fmt.Sprintf("ntp query error with server %q", server), zap.Error(err))
syncer.logger.Error(fmt.Sprintf("time query error with server %q", server), zap.Error(err))
err = nil
} else {
syncer.setLastSyncServer(server)
Expand All @@ -284,20 +290,28 @@ func (syncer *Syncer) query(ctx context.Context) (lastSyncServer string, resp *n
}
}

return lastSyncServer, resp, err
return lastSyncServer, measurement, err
}

func (syncer *Syncer) isPTPDevice(server string) bool {
return strings.HasPrefix(server, "/dev/")
}

func (syncer *Syncer) resolveServers(ctx context.Context) ([]string, error) {
var serverList []string

for _, server := range syncer.getTimeServers() {
ips, err := net.LookupIP(server)
if err != nil {
syncer.logger.Warn(fmt.Sprintf("failed looking up %q, ignored", server), zap.Error(err))
}
if syncer.isPTPDevice(server) {
serverList = append(serverList, server)
} else {
ips, err := net.LookupIP(server)
if err != nil {
syncer.logger.Warn(fmt.Sprintf("failed looking up %q, ignored", server), zap.Error(err))
}

for _, ip := range ips {
serverList = append(serverList, ip.String())
for _, ip := range ips {
serverList = append(serverList, ip.String())
}
}

select {
Expand All @@ -310,7 +324,57 @@ func (syncer *Syncer) resolveServers(ctx context.Context) ([]string, error) {
return serverList, nil
}

func (syncer *Syncer) queryServer(server string) (*ntp.Response, error) {
func (syncer *Syncer) queryServer(server string) (*Measurement, error) {
if syncer.isPTPDevice(server) {
return syncer.queryPTP(server)
}

return syncer.queryNTP(server)
}

func (syncer *Syncer) queryPTP(server string) (*Measurement, error) {
phc, err := os.Open(server)
if err != nil {
return nil, err
}

defer phc.Close() //nolint:errcheck

// From clock_gettime(2):
//
// Using the appropriate macros, open file descriptors may be converted into clock IDs and passed to clock_gettime(), clock_settime(), and clock_adjtime(2). The
// following example shows how to convert a file descriptor into a dynamic clock ID.
//
// #define CLOCKFD 3
// #define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD)

clockid := int32(3 | (^phc.Fd() << 3))

var ts unix.Timespec

err = unix.ClockGettime(clockid, &ts)
if err != nil {
return nil, err
}

offset := time.Until(time.Unix(ts.Sec, ts.Nsec))
syncer.logger.Debug("PTP clock",
zap.Duration("clock_offset", offset),
zap.Int64("sec", ts.Sec),
zap.Int64("nsec", ts.Nsec),
zap.String("device", server),
)

meas := &Measurement{
ClockOffset: offset,
Leap: 0,
Spike: false,
}

return meas, err
}

func (syncer *Syncer) queryNTP(server string) (*Measurement, error) {
resp, err := syncer.NTPQuery(server)
if err != nil {
return nil, err
Expand All @@ -327,11 +391,19 @@ func (syncer *Syncer) queryServer(server string) (*ntp.Response, error) {
zap.Duration("root_distance", resp.RootDistance),
)

if err = resp.Validate(); err != nil {
return resp, err
validationError := resp.Validate()

measurement := &Measurement{
ClockOffset: resp.ClockOffset,
Leap: resp.Leap,
Spike: false,
}

if validationError == nil {
measurement.Spike = syncer.isSpike(resp)
}

return resp, err
return measurement, validationError
}

// log2i returns 0 for v == 0 and v == 1.
Expand Down
6 changes: 3 additions & 3 deletions pkg/machinery/config/schemas/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -3132,9 +3132,9 @@
},
"type": "array",
"title": "servers",
"description": "Specifies time (NTP) servers to use for setting the system time.\nDefaults to time.cloudflare.com.\n",
"markdownDescription": "Specifies time (NTP) servers to use for setting the system time.\nDefaults to `time.cloudflare.com`.",
"x-intellij-html-description": "\u003cp\u003eSpecifies time (NTP) servers to use for setting the system time.\nDefaults to \u003ccode\u003etime.cloudflare.com\u003c/code\u003e.\u003c/p\u003e\n"
"description": "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to time.cloudflare.com.\n\nTalos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as “/dev/ptp0” or “/dev/ptp_kvm”.\n",
"markdownDescription": "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to `time.cloudflare.com`.\n\n Talos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \"/dev/ptp0\" or \"/dev/ptp_kvm\".",
"x-intellij-html-description": "\u003cp\u003edescription: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to \u003ccode\u003etime.cloudflare.com\u003c/code\u003e.\u003c/p\u003e\n\n\u003cp\u003eTalos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \u0026ldquo;/dev/ptp0\u0026rdquo; or \u0026ldquo;/dev/ptp_kvm\u0026rdquo;.\u003c/p\u003e\n"
},
"bootTimeout": {
"type": "string",
Expand Down
3 changes: 3 additions & 0 deletions pkg/machinery/config/types/v1alpha1/v1alpha1_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,9 @@ type TimeConfig struct {
// description: |
// Specifies time (NTP) servers to use for setting the system time.
// Defaults to `time.cloudflare.com`.
//
// Talos can also sync to the PTP time source (e.g provided by the hypervisor),
// provide the path to the PTP device as "/dev/ptp0" or "/dev/ptp_kvm".
TimeServers []string `yaml:"servers,omitempty"`
// description: |
// Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.
Expand Down
4 changes: 2 additions & 2 deletions pkg/machinery/config/types/v1alpha1/v1alpha1_types_doc.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ env:
|`time` |<a href="#Config.machine.time">TimeConfig</a> |Used to configure the machine's time settings. <details><summary>Show example(s)</summary>{{< highlight yaml >}}
time:
disabled: false # Indicates if the time service is disabled for the machine.
# Specifies time (NTP) servers to use for setting the system time.
# description: |
servers:
- time.cloudflare.com
bootTimeout: 2m0s # Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.
Expand Down Expand Up @@ -1984,7 +1984,7 @@ TimeConfig represents the options for configuring time on a machine.
machine:
time:
disabled: false # Indicates if the time service is disabled for the machine.
# Specifies time (NTP) servers to use for setting the system time.
# description: |
servers:
- time.cloudflare.com
bootTimeout: 2m0s # Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.
Expand All @@ -1994,7 +1994,7 @@ machine:
| Field | Type | Description | Value(s) |
|-------|------|-------------|----------|
|`disabled` |bool |<details><summary>Indicates if the time service is disabled for the machine.</summary>Defaults to `false`.</details> | |
|`servers` |[]string |<details><summary>Specifies time (NTP) servers to use for setting the system time.</summary>Defaults to `time.cloudflare.com`.</details> | |
|`servers` |[]string |<details><summary>description: |</summary> Specifies time (NTP) servers to use for setting the system time.<br /> Defaults to `time.cloudflare.com`.<br /><br /> Talos can also sync to the PTP time source (e.g provided by the hypervisor),<br /> provide the path to the PTP device as "/dev/ptp0" or "/dev/ptp_kvm".<br /></details> | |
|`bootTimeout` |Duration |<details><summary>Specifies the timeout when the node time is considered to be in sync unlocking the boot sequence.</summary>NTP sync will be still running in the background.<br />Defaults to "infinity" (waiting forever for time sync)</details> | |


Expand Down
6 changes: 3 additions & 3 deletions website/content/v1.7/schemas/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -3132,9 +3132,9 @@
},
"type": "array",
"title": "servers",
"description": "Specifies time (NTP) servers to use for setting the system time.\nDefaults to time.cloudflare.com.\n",
"markdownDescription": "Specifies time (NTP) servers to use for setting the system time.\nDefaults to `time.cloudflare.com`.",
"x-intellij-html-description": "\u003cp\u003eSpecifies time (NTP) servers to use for setting the system time.\nDefaults to \u003ccode\u003etime.cloudflare.com\u003c/code\u003e.\u003c/p\u003e\n"
"description": "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to time.cloudflare.com.\n\nTalos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as “/dev/ptp0” or “/dev/ptp_kvm”.\n",
"markdownDescription": "description: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to `time.cloudflare.com`.\n\n Talos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \"/dev/ptp0\" or \"/dev/ptp_kvm\".",
"x-intellij-html-description": "\u003cp\u003edescription: |\n Specifies time (NTP) servers to use for setting the system time.\n Defaults to \u003ccode\u003etime.cloudflare.com\u003c/code\u003e.\u003c/p\u003e\n\n\u003cp\u003eTalos can also sync to the PTP time source (e.g provided by the hypervisor),\n provide the path to the PTP device as \u0026ldquo;/dev/ptp0\u0026rdquo; or \u0026ldquo;/dev/ptp_kvm\u0026rdquo;.\u003c/p\u003e\n"
},
"bootTimeout": {
"type": "string",
Expand Down

0 comments on commit 84ec8c1

Please sign in to comment.