| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
 | /*
 * kvm trace
 *
 * It is designed to allow debugging traces of kvm to be generated
 * on UP / SMP machines.  Each trace entry can be timestamped so that
 * it's possible to reconstruct a chronological record of trace events.
 * The implementation refers to blktrace kernel support.
 *
 * Copyright (c) 2008 Intel Corporation
 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
 *
 * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
 *
 * Date:    Feb 2008
 */
#include <linux/module.h>
#include <linux/relay.h>
#include <linux/debugfs.h>
#include <linux/ktime.h>
#include <linux/kvm_host.h>
#define KVM_TRACE_STATE_RUNNING 	(1 << 0)
#define KVM_TRACE_STATE_PAUSE 		(1 << 1)
#define KVM_TRACE_STATE_CLEARUP 	(1 << 2)
struct kvm_trace {
	int trace_state;
	struct rchan *rchan;
	struct dentry *lost_file;
	atomic_t lost_records;
};
static struct kvm_trace *kvm_trace;
struct kvm_trace_probe {
	const char *name;
	const char *format;
	u32 timestamp_in;
	marker_probe_func *probe_func;
};
static inline int calc_rec_size(int timestamp, int extra)
{
	int rec_size = KVM_TRC_HEAD_SIZE;
	rec_size += extra;
	return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
}
static void kvm_add_trace(void *probe_private, void *call_data,
			  const char *format, va_list *args)
{
	struct kvm_trace_probe *p = probe_private;
	struct kvm_trace *kt = kvm_trace;
	struct kvm_trace_rec rec;
	struct kvm_vcpu *vcpu;
	int    i, size;
	u32    extra;
	if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
		return;
	rec.rec_val	= TRACE_REC_EVENT_ID(va_arg(*args, u32));
	vcpu		= va_arg(*args, struct kvm_vcpu *);
	rec.pid		= current->tgid;
	rec.vcpu_id	= vcpu->vcpu_id;
	extra   	= va_arg(*args, u32);
	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX);
	rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
			| TRACE_REC_NUM_DATA_ARGS(extra);
	if (p->timestamp_in) {
		rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
		for (i = 0; i < extra; i++)
			rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
	} else {
		for (i = 0; i < extra; i++)
			rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
	}
	size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
	relay_write(kt->rchan, &rec, size);
}
static struct kvm_trace_probe kvm_trace_probes[] = {
	{ "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
	{ "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
};
static int lost_records_get(void *data, u64 *val)
{
	struct kvm_trace *kt = data;
	*val = atomic_read(&kt->lost_records);
	return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
/*
 *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
 *  many times we encountered a full subbuffer, to tell user space app the
 *  lost records there were.
 */
static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
				     void *prev_subbuf, size_t prev_padding)
{
	struct kvm_trace *kt;
	if (!relay_buf_full(buf)) {
		if (!prev_subbuf) {
			/*
			 * executed only once when the channel is opened
			 * save metadata as first record
			 */
			subbuf_start_reserve(buf, sizeof(u32));
			*(u32 *)subbuf = 0x12345678;
		}
		return 1;
	}
	kt = buf->chan->private_data;
	atomic_inc(&kt->lost_records);
	return 0;
}
static struct dentry *kvm_create_buf_file_callack(const char *filename,
						 struct dentry *parent,
						 int mode,
						 struct rchan_buf *buf,
						 int *is_global)
{
	return debugfs_create_file(filename, mode, parent, buf,
				   &relay_file_operations);
}
static int kvm_remove_buf_file_callback(struct dentry *dentry)
{
	debugfs_remove(dentry);
	return 0;
}
static struct rchan_callbacks kvm_relay_callbacks = {
	.subbuf_start 		= kvm_subbuf_start_callback,
	.create_buf_file 	= kvm_create_buf_file_callack,
	.remove_buf_file 	= kvm_remove_buf_file_callback,
};
static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
{
	struct kvm_trace *kt;
	int i, r = -ENOMEM;
	if (!kuts->buf_size || !kuts->buf_nr)
		return -EINVAL;
	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
	if (!kt)
		goto err;
	r = -EIO;
	atomic_set(&kt->lost_records, 0);
	kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
					    kt, &kvm_trace_lost_ops);
	if (!kt->lost_file)
		goto err;
	kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
				kuts->buf_nr, &kvm_relay_callbacks, kt);
	if (!kt->rchan)
		goto err;
	kvm_trace = kt;
	for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
		struct kvm_trace_probe *p = &kvm_trace_probes[i];
		r = marker_probe_register(p->name, p->format, p->probe_func, p);
		if (r)
			printk(KERN_INFO "Unable to register probe %s\n",
			       p->name);
	}
	kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
	return 0;
err:
	if (kt) {
		if (kt->lost_file)
			debugfs_remove(kt->lost_file);
		if (kt->rchan)
			relay_close(kt->rchan);
		kfree(kt);
	}
	return r;
}
static int kvm_trace_enable(char __user *arg)
{
	struct kvm_user_trace_setup kuts;
	int ret;
	ret = copy_from_user(&kuts, arg, sizeof(kuts));
	if (ret)
		return -EFAULT;
	ret = do_kvm_trace_enable(&kuts);
	if (ret)
		return ret;
	return 0;
}
static int kvm_trace_pause(void)
{
	struct kvm_trace *kt = kvm_trace;
	int r = -EINVAL;
	if (kt == NULL)
		return r;
	if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
		kt->trace_state = KVM_TRACE_STATE_PAUSE;
		relay_flush(kt->rchan);
		r = 0;
	}
	return r;
}
void kvm_trace_cleanup(void)
{
	struct kvm_trace *kt = kvm_trace;
	int i;
	if (kt == NULL)
		return;
	if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
	    kt->trace_state == KVM_TRACE_STATE_PAUSE) {
		kt->trace_state = KVM_TRACE_STATE_CLEARUP;
		for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
			struct kvm_trace_probe *p = &kvm_trace_probes[i];
			marker_probe_unregister(p->name, p->probe_func, p);
		}
		marker_synchronize_unregister();
		relay_close(kt->rchan);
		debugfs_remove(kt->lost_file);
		kfree(kt);
	}
}
int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
{
	void __user *argp = (void __user *)arg;
	long r = -EINVAL;
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
	switch (ioctl) {
	case KVM_TRACE_ENABLE:
		r = kvm_trace_enable(argp);
		break;
	case KVM_TRACE_PAUSE:
		r = kvm_trace_pause();
		break;
	case KVM_TRACE_DISABLE:
		r = 0;
		kvm_trace_cleanup();
		break;
	}
	return r;
}
 |