-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathvirtio_vmmci.c
406 lines (340 loc) · 10.5 KB
/
virtio_vmmci.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
/*
* Implementation of an OpenBSD VMM control interface for Linux guests
* running under an OpenBSD host.
*
* Copyright 2020 Dave Voutila
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/reboot.h>
#include <linux/rtc.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/time64.h>
#include <linux/timekeeping.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include "virtio_vmmci.h"
/* You can either change the global debug level here by changing the
* initialization value for "debug" or configure it at runtime via
* the kernel module parameter. See README.md for details.
*/
static int debug = 0;
static int set_debug(const char *val, const struct kernel_param *kp)
{
int n = 0, rc;
rc = kstrtoint(val, 10, &n);
if (rc || n < 0)
return -EINVAL;
return param_set_int(val, kp);
}
static int get_debug(char *buffer, const struct kernel_param *kp)
{
int bytes;
bytes = snprintf(buffer, 1024, "%d\n", debug);
return bytes + 1; // account for NULL
}
static const struct kernel_param_ops debug_param_ops = {
.set = set_debug,
.get = get_debug,
};
module_param_cb(debug, &debug_param_ops, &debug, 0664);
/* Define our sysctl table entries for exposing our current clock
* drift in seconds and nanoseconds. (Avoid using floating point vals
* for now.)
*/
int drift_sec = 0;
int drift_nsec = 0;
static struct ctl_table_header *vmmci_table_header;
static struct ctl_table drift_table[] = {
{
.procname = "drift_sec",
.mode = 0444,
.maxlen = sizeof(int),
.data = &drift_sec,
.proc_handler = &proc_dointvec,
},
{
.procname = "drift_nsec",
.mode = 0444,
.maxlen = sizeof(int),
.data = &drift_nsec,
.proc_handler = &proc_dointvec,
},
{ },
};
#if LINUX_VERSION_CODE < KERNEL_VERSION(6,6,0)
/*
* Removed in:
* https://github.com/torvalds/linux/commit/2f2665c13af4895b26761107c2f637c2f112d8e9
*/
static struct ctl_table vmmci_table = {
.procname = "vmmci",
.child = drift_table,
};
#endif
/* Define our basic commands and structs for our device including the
* virtio feature tables.
*/
enum vmmci_cmd {
VMMCI_NONE = 0,
VMMCI_SHUTDOWN,
VMMCI_REBOOT,
VMMCI_SYNCRTC,
};
struct virtio_vmmci {
struct virtio_device *vdev;
/* Used for monitoring clock drift. Needs scheduling. */
struct workqueue_struct *monitor_wq;
struct delayed_work monitor_work;
/* Used for synchronizing clock. Work is put on from
* the general purpose queue from the interrupt handler.
*/
struct work_struct sync_work;
};
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_VMMCI, VIRTIO_DEV_ANY_ID },
{ 0 },
};
static unsigned int features[] = {
VMMCI_F_TIMESYNC, VMMCI_F_ACK, VMMCI_F_SYNCRTC,
};
/* Synchronizes the system time to the hardware clock (rtc). Uses a process
* similar to the one performed by the kernel at startup as defined in
* the Linux kernel source file /drivers/rtc/hctosys.c. Minus the 32-bit
* and non-amd64 specific stuff.
*/
#ifdef VMMCI_RTC_DEVICE
static int sync_system_time(void)
{
int rc = -1;
struct rtc_time hw_tm;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0)
struct timespec time = {
#else
struct timespec64 time = {
#endif
.tv_nsec = NSEC_PER_SEC >> 1,
};
// Try to open the hardware clock...which should be the emulated
// mc146818 clock device.
struct rtc_device *rtc = rtc_class_open(VMMCI_RTC_DEVICE);
if (rtc == NULL) {
printk(KERN_ERR "vmmci unable to open rtc device\n");
rc = -ENODEV;
goto end;
}
// Reading the rtc device should be the same as getting the host
// time via the vmmci config registers...just without all the
// nastiness
rc = rtc_read_time(rtc, &hw_tm);
if (rc) {
printk(KERN_ERR "vmmci failed to read the hardware clock\n");
goto close;
}
// Setting the system clock using do_settimeofday64 should be safe
// as it is similar to OpenBSD's tc_setclock that steps the system
// clock while triggering any alarms/timeouts that should fire
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0)
rtc_tm_to_time(&hw_tm, &time.tv_sec);
rc = do_settimeofday(&time);
#else
time.tv_sec = rtc_tm_to_time64(&hw_tm);
rc = do_settimeofday64(&time);
#endif
if (rc) {
printk(KERN_ERR "vmmci failed to set system clock to rtc!\n");
goto close;
}
log("set system clock to %d-%02d-%02d %02d:%02d:%02d UTC\n",
hw_tm.tm_year + 1900, hw_tm.tm_mon + 1, hw_tm.tm_mday,
hw_tm.tm_hour, hw_tm.tm_min, hw_tm.tm_sec);
close:
// I assume this cleans up any references, if the kernel tracks them
rtc_class_close(rtc);
end:
return rc;
}
#else
static int sync_system_time(void)
{
debug("no known rtc device available");
return -1;
}
#endif
static void sync_work_func(struct work_struct *work)
{
int rc = 0;
debug("starting clock synchronization...");
rc = sync_system_time();
if (rc)
debug("clock synchronization failed (%d)\n", rc);
else
debug("finished clock synchronization!\n");
}
/* Runs our guest/host clock drift measurements and logs them to the syslog */
static void monitor_work_func(struct work_struct *work)
{
struct virtio_vmmci *vmmci;
struct timespec64 host, guest, diff;
s64 sec, usec; // should these be signed or unsigned?
debug("measuring clock drift...\n");
// My god this container_of stuff seems...messy? Oh, Linux...
vmmci = container_of((struct delayed_work *) work, struct virtio_vmmci, monitor_work);
vmmci->vdev->config->get(vmmci->vdev, VMMCI_CONFIG_TIME_SEC, &sec, sizeof(sec));
vmmci->vdev->config->get(vmmci->vdev, VMMCI_CONFIG_TIME_USEC, &usec, sizeof(usec));
#if LINUX_VERSION_CODE < KERNEL_VERSION(5,0,0)
getnstimeofday64(&guest);
#else
ktime_get_real_ts64(&guest);
#endif
debug("host clock: %lld.%09lld, guest clock: " TIME_FMT,
sec, usec * NSEC_PER_USEC, guest.tv_sec, guest.tv_nsec);
host.tv_sec = sec;
host.tv_nsec = (long) usec * NSEC_PER_USEC;
diff = timespec64_sub(host, guest);
// XXX: our globals for tracking drift...since we're not SMP enabled let's
// ignore locking/unlocking for now...also yes, we're blindly going from a
// s64 to an int here.
drift_sec = diff.tv_sec;
drift_nsec = diff.tv_nsec;
debug("current clock drift: " TIME_FMT " seconds\n", diff.tv_sec, diff.tv_nsec);
queue_delayed_work(vmmci->monitor_wq, &vmmci->monitor_work, DELAY_20s);
debug("drift measurement routine finished\n");
}
static int vmmci_probe(struct virtio_device *vdev)
{
struct virtio_vmmci *vmmci;
debug("initializing vmmci device\n");
debug("HZ: %d", HZ);
vdev->priv = vmmci = kzalloc(sizeof(*vmmci), GFP_KERNEL);
if (!vmmci) {
printk(KERN_ERR "vmmci_probe: failed to alloc vmmci struct\n");
return -ENOMEM;
}
vmmci->vdev = vdev;
if (virtio_has_feature(vdev, VMMCI_F_TIMESYNC))
debug("...found feature TIMESYNC\n");
if (virtio_has_feature(vdev, VMMCI_F_ACK))
debug("...found feature ACK\n");
if (virtio_has_feature(vdev, VMMCI_F_SYNCRTC))
debug("...found feature SYNCRTC\n");
// wire up routine clock drift monitoring
vmmci->monitor_wq = create_singlethread_workqueue(QNAME_MONITOR);
if (vmmci->monitor_wq == NULL) {
printk(KERN_ERR "vmmci_probe: failed to alloc monitoring workqueue\n");
return -ENOMEM;
}
INIT_DELAYED_WORK(&vmmci->monitor_work, monitor_work_func);
queue_delayed_work(vmmci->monitor_wq, &vmmci->monitor_work, DELAY_1s);
INIT_WORK(&vmmci->sync_work, sync_work_func);
#if LINUX_VERSION_CODE < KERNEL_VERSION(6,6,0)
vmmci_table_header = register_sysctl_table(&vmmci_table);
#else
vmmci_table_header = register_sysctl_sz("vmmci", drift_table, 2);
#endif
log("started VMM Control Interface driver\n");
return 0;
}
static void vmmci_remove(struct virtio_device *vdev)
{
struct virtio_vmmci *vmmci = vdev->priv;
debug("removing device\n");
cancel_delayed_work(&vmmci->monitor_work);
flush_workqueue(vmmci->monitor_wq);
destroy_workqueue(vmmci->monitor_wq);
cancel_work_sync(&vmmci->sync_work);
debug("cancelled, flushed, and destroyed work queues\n");
vdev->config->reset(vdev);
debug("reset device\n");
kfree(vmmci);
unregister_sysctl_table(vmmci_table_header);
log("removed device\n");
}
static void vmmci_changed(struct virtio_device *vdev)
{
struct virtio_vmmci *vmmci = vdev->priv;
s32 cmd = 0;
debug("reading command register...\n");
vdev->config->get(vdev, VMMCI_CONFIG_COMMAND, &cmd, sizeof(cmd));
switch (cmd) {
case VMMCI_NONE:
debug("VMMCI_NONE received\n");
break;
case VMMCI_SHUTDOWN:
log("shutdown requested by host!\n");
orderly_poweroff(false);
break;
case VMMCI_REBOOT:
log("reboot requested by host!\n");
orderly_reboot();
break;
case VMMCI_SYNCRTC:
log("clock sync requested by host\n");
schedule_work(&vmmci->sync_work);
break;
default:
printk(KERN_ERR "invalid command received: 0x%04x\n", cmd);
break;
}
if (cmd != VMMCI_NONE
&& (vdev->features & VMMCI_F_ACK)) {
vdev->config->set(vdev, VMMCI_CONFIG_COMMAND, &cmd, sizeof(cmd));
debug("...acknowledged command %d\n", cmd);
}
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)
static int vmmci_validate(struct virtio_device *vdev)
{
debug("not implemented");
return 0;
}
#endif
#ifdef CONFIG_PM_SLEEP
static int vmmci_freeze(struct virtio_device *vdev)
{
debug("not implemented\n");
return 0;
}
static int vmmci_restore(struct virtio_device *vdev)
{
debug("not implemented\n");
return 0;
}
#endif
static struct virtio_driver virtio_vmmci_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)
.validate = vmmci_validate,
#endif
.probe = vmmci_probe,
.remove = vmmci_remove,
.config_changed = vmmci_changed,
#ifdef CONFIG_PM_SLEEP
.freeze = vmmci_freeze,
.restore = vmmci_restore,
#endif
};
module_virtio_driver(virtio_vmmci_driver);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("OpenBSD VMM Control Interface");
MODULE_AUTHOR("Dave Voutila <[email protected]>");
MODULE_SOFTDEP("pre: virtio_pci_obsd");