Merge lp:~semiosis/ubuntu/trusty/glusterfs/fix-for-1268064 into lp:ubuntu/trusty/glusterfs
- Trusty (14.04)
- fix-for-1268064
- Merge into trusty
Proposed by
Louis Zuckerman
Status: | Merged |
---|---|
Approved by: | James Page |
Approved revision: | 31 |
Merge reported by: | James Page |
Merged at revision: | not available |
Proposed branch: | lp:~semiosis/ubuntu/trusty/glusterfs/fix-for-1268064 |
Merge into: | lp:ubuntu/trusty/glusterfs |
Diff against target: |
17170 lines (+7414/-7666) 93 files modified
.pc/.quilt_patches (+0/-1) .pc/.quilt_series (+0/-1) .pc/.version (+0/-1) .pc/01-spelling-error.diff/rpc/rpc-transport/rdma/src/rdma.c (+0/-4578) .pc/01-spelling-error.diff/xlators/mgmt/glusterd/src/glusterd-store.c (+0/-2530) .pc/applied-patches (+0/-1) ChangeLog (+1978/-4) api/Makefile.am (+1/-1) api/Makefile.in (+1/-1) api/examples/Makefile.am (+6/-0) api/examples/Makefile.in (+555/-0) api/examples/README (+19/-0) api/examples/gfapi.py (+402/-0) api/examples/glfsxmp.c (+1601/-0) api/examples/setup.py.in (+29/-0) api/src/Makefile.am (+3/-2) api/src/Makefile.in (+15/-3) api/src/glfs-fops.c (+91/-32) api/src/glfs-handleops.c (+1278/-0) api/src/glfs-handles.h (+143/-0) api/src/glfs-internal.h (+59/-0) api/src/glfs-mem-types.h (+2/-1) api/src/glfs-resolve.c (+70/-3) api/src/glfs.c (+22/-4) api/src/glfs.h (+26/-0) cli/src/cli-cmd-parser.c (+36/-4) cli/src/cli-cmd-volume.c (+68/-3) cli/src/cli-rpc-ops.c (+22/-14) cli/src/cli-xml-output.c (+81/-4) configure (+13/-11) configure.ac (+3/-1) debian/changelog (+16/-0) debian/glusterfs-client.mounting-glusterfs.upstart (+9/-0) debian/glusterfs-server.mounting-glusterfs.upstart (+0/-7) debian/rules (+3/-2) debian/source.lintian-overrides (+1/-0) doc/glusterd.vol (+1/-0) glusterfs.spec (+41/-16) glusterfs.spec.in (+33/-8) glusterfsd/src/glusterfsd-mgmt.c (+5/-0) libglusterfs/src/globals.c (+56/-1) libglusterfs/src/globals.h (+4/-0) libglusterfs/src/mem-types.h (+2/-1) libglusterfs/src/syncop.c (+154/-0) libglusterfs/src/syncop.h (+63/-19) rpc/rpc-transport/rdma/src/rdma.c (+4/-4) xlators/cluster/afr/src/afr-inode-read.c (+1/-1) xlators/cluster/afr/src/afr.c (+10/-0) xlators/cluster/dht/src/dht-common.c (+16/-6) xlators/cluster/dht/src/dht-common.h (+1/-0) xlators/cluster/dht/src/dht-inode-read.c (+4/-0) xlators/cluster/dht/src/dht-inode-write.c (+2/-0) xlators/cluster/dht/src/dht-layout.c (+11/-3) xlators/cluster/dht/src/dht-rebalance.c (+29/-14) xlators/cluster/dht/src/dht-selfheal.c (+7/-0) xlators/mgmt/glusterd/src/glusterd-brick-ops.c (+9/-3) xlators/mgmt/glusterd/src/glusterd-op-sm.c (+71/-4) xlators/mgmt/glusterd/src/glusterd-pmap.c (+5/-5) xlators/mgmt/glusterd/src/glusterd-replace-brick.c (+8/-3) xlators/mgmt/glusterd/src/glusterd-store.c (+3/-4) xlators/mgmt/glusterd/src/glusterd-utils.c (+148/-14) xlators/mgmt/glusterd/src/glusterd-utils.h (+7/-2) xlators/mgmt/glusterd/src/glusterd-volume-ops.c (+12/-4) xlators/mgmt/glusterd/src/glusterd.c (+10/-2) xlators/mgmt/glusterd/src/glusterd.h (+1/-0) xlators/mount/fuse/src/fuse-bridge.h (+14/-7) xlators/nfs/server/src/mount3.c (+5/-14) xlators/nfs/server/src/mount3.h (+4/-13) xlators/nfs/server/src/mount3udp_svc.c (+4/-13) xlators/nfs/server/src/nfs-common.c (+4/-13) xlators/nfs/server/src/nfs-common.h (+4/-13) xlators/nfs/server/src/nfs-fops.c (+4/-13) xlators/nfs/server/src/nfs-fops.h (+4/-13) xlators/nfs/server/src/nfs-generics.c (+4/-13) xlators/nfs/server/src/nfs-generics.h (+4/-13) xlators/nfs/server/src/nfs-inodes.c (+4/-13) xlators/nfs/server/src/nfs-inodes.h (+4/-13) xlators/nfs/server/src/nfs-mem-types.h (+4/-13) xlators/nfs/server/src/nfs.c (+4/-13) xlators/nfs/server/src/nfs.h (+4/-13) xlators/nfs/server/src/nfs3-fh.c (+4/-13) xlators/nfs/server/src/nfs3-fh.h (+4/-13) xlators/nfs/server/src/nfs3-helpers.c (+4/-13) xlators/nfs/server/src/nfs3-helpers.h (+4/-13) xlators/nfs/server/src/nfs3.c (+4/-13) xlators/nfs/server/src/nfs3.h (+4/-13) xlators/nfs/server/src/nlm4.c (+4/-13) xlators/nfs/server/src/nlm4.h (+4/-13) xlators/nfs/server/src/nlmcbk_svc.c (+4/-13) xlators/protocol/auth/addr/src/addr.c (+4/-13) xlators/protocol/auth/login/src/login.c (+4/-13) xlators/protocol/client/src/client-handshake.c (+1/-0) xlators/storage/posix/src/posix.c (+27/-10) |
To merge this branch: | bzr merge lp:~semiosis/ubuntu/trusty/glusterfs/fix-for-1268064 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
James Page | Pending | ||
Review via email: mp+201280@code.launchpad.net |
Commit message
Description of the change
Update glusterfs version & fix bug
To post a comment you must log in.
- 31. By Louis Zuckerman
-
adding debian/
glusterfs- client. mounting- glusterfs. upstart which was missed in the last commit
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === removed directory '.pc' |
2 | === removed file '.pc/.quilt_patches' |
3 | --- .pc/.quilt_patches 2013-05-29 09:55:03 +0000 |
4 | +++ .pc/.quilt_patches 1970-01-01 00:00:00 +0000 |
5 | @@ -1,1 +0,0 @@ |
6 | -debian/patches |
7 | |
8 | === removed file '.pc/.quilt_series' |
9 | --- .pc/.quilt_series 2013-05-29 09:55:03 +0000 |
10 | +++ .pc/.quilt_series 1970-01-01 00:00:00 +0000 |
11 | @@ -1,1 +0,0 @@ |
12 | -series |
13 | |
14 | === removed file '.pc/.version' |
15 | --- .pc/.version 2013-05-29 09:55:03 +0000 |
16 | +++ .pc/.version 1970-01-01 00:00:00 +0000 |
17 | @@ -1,1 +0,0 @@ |
18 | -2 |
19 | |
20 | === removed directory '.pc/01-spelling-error.diff' |
21 | === removed directory '.pc/01-spelling-error.diff/rpc' |
22 | === removed directory '.pc/01-spelling-error.diff/rpc/rpc-transport' |
23 | === removed directory '.pc/01-spelling-error.diff/rpc/rpc-transport/rdma' |
24 | === removed directory '.pc/01-spelling-error.diff/rpc/rpc-transport/rdma/src' |
25 | === removed file '.pc/01-spelling-error.diff/rpc/rpc-transport/rdma/src/rdma.c' |
26 | --- .pc/01-spelling-error.diff/rpc/rpc-transport/rdma/src/rdma.c 2013-07-23 13:53:32 +0000 |
27 | +++ .pc/01-spelling-error.diff/rpc/rpc-transport/rdma/src/rdma.c 1970-01-01 00:00:00 +0000 |
28 | @@ -1,4578 +0,0 @@ |
29 | -/* |
30 | - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> |
31 | - This file is part of GlusterFS. |
32 | - |
33 | - This file is licensed to you under your choice of the GNU Lesser |
34 | - General Public License, version 3 or any later version (LGPLv3 or |
35 | - later), or the GNU General Public License, version 2 (GPLv2), in all |
36 | - cases as published by the Free Software Foundation. |
37 | -*/ |
38 | - |
39 | -#ifndef _CONFIG_H |
40 | -#define _CONFIG_H |
41 | -#include "config.h" |
42 | -#endif |
43 | - |
44 | -#include "dict.h" |
45 | -#include "glusterfs.h" |
46 | -#include "logging.h" |
47 | -#include "rdma.h" |
48 | -#include "name.h" |
49 | -#include "byte-order.h" |
50 | -#include "xlator.h" |
51 | -#include "xdr-rpc.h" |
52 | -#include <signal.h> |
53 | - |
54 | -#define GF_RDMA_LOG_NAME "rpc-transport/rdma" |
55 | - |
56 | -static int32_t |
57 | -__gf_rdma_ioq_churn (gf_rdma_peer_t *peer); |
58 | - |
59 | -gf_rdma_post_t * |
60 | -gf_rdma_post_ref (gf_rdma_post_t *post); |
61 | - |
62 | -int |
63 | -gf_rdma_post_unref (gf_rdma_post_t *post); |
64 | - |
65 | -static void * |
66 | -gf_rdma_send_completion_proc (void *data); |
67 | - |
68 | -static void * |
69 | -gf_rdma_recv_completion_proc (void *data); |
70 | - |
71 | -void * |
72 | -gf_rdma_async_event_thread (void *context); |
73 | - |
74 | -static int32_t |
75 | -gf_rdma_create_qp (rpc_transport_t *this); |
76 | - |
77 | -static int32_t |
78 | -__gf_rdma_teardown (rpc_transport_t *this); |
79 | - |
80 | -static int32_t |
81 | -gf_rdma_teardown (rpc_transport_t *this); |
82 | - |
83 | -static int32_t |
84 | -gf_rdma_disconnect (rpc_transport_t *this); |
85 | - |
86 | -static void |
87 | -gf_rdma_cm_handle_disconnect (rpc_transport_t *this); |
88 | - |
89 | - |
90 | -static void |
91 | -gf_rdma_put_post (gf_rdma_queue_t *queue, gf_rdma_post_t *post) |
92 | -{ |
93 | - post->ctx.is_request = 0; |
94 | - |
95 | - pthread_mutex_lock (&queue->lock); |
96 | - { |
97 | - if (post->prev) { |
98 | - queue->active_count--; |
99 | - post->prev->next = post->next; |
100 | - } |
101 | - |
102 | - if (post->next) { |
103 | - post->next->prev = post->prev; |
104 | - } |
105 | - |
106 | - post->prev = &queue->passive_posts; |
107 | - post->next = post->prev->next; |
108 | - post->prev->next = post; |
109 | - post->next->prev = post; |
110 | - queue->passive_count++; |
111 | - } |
112 | - pthread_mutex_unlock (&queue->lock); |
113 | -} |
114 | - |
115 | - |
116 | -static gf_rdma_post_t * |
117 | -gf_rdma_new_post (rpc_transport_t *this, gf_rdma_device_t *device, int32_t len, |
118 | - gf_rdma_post_type_t type) |
119 | -{ |
120 | - gf_rdma_post_t *post = NULL; |
121 | - int ret = -1; |
122 | - |
123 | - post = (gf_rdma_post_t *) GF_CALLOC (1, sizeof (*post), |
124 | - gf_common_mt_rdma_post_t); |
125 | - if (post == NULL) { |
126 | - goto out; |
127 | - } |
128 | - |
129 | - pthread_mutex_init (&post->lock, NULL); |
130 | - |
131 | - post->buf_size = len; |
132 | - |
133 | - post->buf = valloc (len); |
134 | - if (!post->buf) { |
135 | - gf_log_nomem (GF_RDMA_LOG_NAME, GF_LOG_ERROR, len); |
136 | - goto out; |
137 | - } |
138 | - |
139 | - post->mr = ibv_reg_mr (device->pd, |
140 | - post->buf, |
141 | - post->buf_size, |
142 | - IBV_ACCESS_LOCAL_WRITE); |
143 | - if (!post->mr) { |
144 | - gf_log (this->name, GF_LOG_WARNING, |
145 | - "memory registration failed (%s)", |
146 | - strerror (errno)); |
147 | - goto out; |
148 | - } |
149 | - |
150 | - post->device = device; |
151 | - post->type = type; |
152 | - |
153 | - ret = 0; |
154 | -out: |
155 | - if (ret != 0) { |
156 | - free (post->buf); |
157 | - |
158 | - GF_FREE (post); |
159 | - post = NULL; |
160 | - } |
161 | - |
162 | - return post; |
163 | -} |
164 | - |
165 | - |
166 | -static gf_rdma_post_t * |
167 | -gf_rdma_get_post (gf_rdma_queue_t *queue) |
168 | -{ |
169 | - gf_rdma_post_t *post = NULL; |
170 | - |
171 | - pthread_mutex_lock (&queue->lock); |
172 | - { |
173 | - post = queue->passive_posts.next; |
174 | - if (post == &queue->passive_posts) |
175 | - post = NULL; |
176 | - |
177 | - if (post) { |
178 | - if (post->prev) |
179 | - post->prev->next = post->next; |
180 | - if (post->next) |
181 | - post->next->prev = post->prev; |
182 | - post->prev = &queue->active_posts; |
183 | - post->next = post->prev->next; |
184 | - post->prev->next = post; |
185 | - post->next->prev = post; |
186 | - post->reused++; |
187 | - queue->active_count++; |
188 | - } |
189 | - } |
190 | - pthread_mutex_unlock (&queue->lock); |
191 | - |
192 | - return post; |
193 | -} |
194 | - |
195 | -void |
196 | -gf_rdma_destroy_post (gf_rdma_post_t *post) |
197 | -{ |
198 | - ibv_dereg_mr (post->mr); |
199 | - free (post->buf); |
200 | - GF_FREE (post); |
201 | -} |
202 | - |
203 | - |
204 | -static int32_t |
205 | -__gf_rdma_quota_get (gf_rdma_peer_t *peer) |
206 | -{ |
207 | - int32_t ret = -1; |
208 | - gf_rdma_private_t *priv = NULL; |
209 | - |
210 | - priv = peer->trans->private; |
211 | - |
212 | - if (priv->connected && peer->quota > 0) { |
213 | - ret = peer->quota--; |
214 | - } |
215 | - |
216 | - return ret; |
217 | -} |
218 | - |
219 | - |
220 | -static void |
221 | -__gf_rdma_ioq_entry_free (gf_rdma_ioq_t *entry) |
222 | -{ |
223 | - list_del_init (&entry->list); |
224 | - |
225 | - if (entry->iobref) { |
226 | - iobref_unref (entry->iobref); |
227 | - entry->iobref = NULL; |
228 | - } |
229 | - |
230 | - if (entry->msg.request.rsp_iobref) { |
231 | - iobref_unref (entry->msg.request.rsp_iobref); |
232 | - entry->msg.request.rsp_iobref = NULL; |
233 | - } |
234 | - |
235 | - mem_put (entry); |
236 | -} |
237 | - |
238 | - |
239 | -static void |
240 | -__gf_rdma_ioq_flush (gf_rdma_peer_t *peer) |
241 | -{ |
242 | - gf_rdma_ioq_t *entry = NULL, *dummy = NULL; |
243 | - |
244 | - list_for_each_entry_safe (entry, dummy, &peer->ioq, list) { |
245 | - __gf_rdma_ioq_entry_free (entry); |
246 | - } |
247 | -} |
248 | - |
249 | - |
250 | -static int32_t |
251 | -__gf_rdma_disconnect (rpc_transport_t *this) |
252 | -{ |
253 | - gf_rdma_private_t *priv = NULL; |
254 | - |
255 | - priv = this->private; |
256 | - |
257 | - if (priv->connected) { |
258 | - rdma_disconnect (priv->peer.cm_id); |
259 | - } |
260 | - |
261 | - return 0; |
262 | -} |
263 | - |
264 | - |
265 | -static void |
266 | -gf_rdma_queue_init (gf_rdma_queue_t *queue) |
267 | -{ |
268 | - pthread_mutex_init (&queue->lock, NULL); |
269 | - |
270 | - queue->active_posts.next = &queue->active_posts; |
271 | - queue->active_posts.prev = &queue->active_posts; |
272 | - queue->passive_posts.next = &queue->passive_posts; |
273 | - queue->passive_posts.prev = &queue->passive_posts; |
274 | -} |
275 | - |
276 | - |
277 | -static void |
278 | -__gf_rdma_destroy_queue (gf_rdma_post_t *post) |
279 | -{ |
280 | - gf_rdma_post_t *tmp = NULL; |
281 | - |
282 | - while (post->next != post) { |
283 | - tmp = post->next; |
284 | - |
285 | - post->next = post->next->next; |
286 | - post->next->prev = post; |
287 | - |
288 | - gf_rdma_destroy_post (tmp); |
289 | - } |
290 | -} |
291 | - |
292 | - |
293 | -static void |
294 | -gf_rdma_destroy_queue (gf_rdma_queue_t *queue) |
295 | -{ |
296 | - if (queue == NULL) { |
297 | - goto out; |
298 | - } |
299 | - |
300 | - pthread_mutex_lock (&queue->lock); |
301 | - { |
302 | - if (queue->passive_count > 0) { |
303 | - __gf_rdma_destroy_queue (&queue->passive_posts); |
304 | - queue->passive_count = 0; |
305 | - } |
306 | - |
307 | - if (queue->active_count > 0) { |
308 | - __gf_rdma_destroy_queue (&queue->active_posts); |
309 | - queue->active_count = 0; |
310 | - } |
311 | - } |
312 | - pthread_mutex_unlock (&queue->lock); |
313 | - |
314 | -out: |
315 | - return; |
316 | -} |
317 | - |
318 | - |
319 | -static void |
320 | -gf_rdma_destroy_posts (rpc_transport_t *this) |
321 | -{ |
322 | - gf_rdma_device_t *device = NULL; |
323 | - gf_rdma_private_t *priv = NULL; |
324 | - |
325 | - if (this == NULL) { |
326 | - goto out; |
327 | - } |
328 | - |
329 | - priv = this->private; |
330 | - device = priv->device; |
331 | - |
332 | - gf_rdma_destroy_queue (&device->sendq); |
333 | - gf_rdma_destroy_queue (&device->recvq); |
334 | - |
335 | -out: |
336 | - return; |
337 | -} |
338 | - |
339 | - |
340 | -static int32_t |
341 | -__gf_rdma_create_posts (rpc_transport_t *this, int32_t count, int32_t size, |
342 | - gf_rdma_queue_t *q, gf_rdma_post_type_t type) |
343 | -{ |
344 | - int32_t i = 0; |
345 | - int32_t ret = 0; |
346 | - gf_rdma_private_t *priv = NULL; |
347 | - gf_rdma_device_t *device = NULL; |
348 | - |
349 | - priv = this->private; |
350 | - device = priv->device; |
351 | - |
352 | - for (i=0 ; i<count ; i++) { |
353 | - gf_rdma_post_t *post = NULL; |
354 | - |
355 | - post = gf_rdma_new_post (this, device, size + 2048, type); |
356 | - if (!post) { |
357 | - gf_log (this->name, GF_LOG_ERROR, |
358 | - "post creation failed"); |
359 | - ret = -1; |
360 | - break; |
361 | - } |
362 | - |
363 | - gf_rdma_put_post (q, post); |
364 | - } |
365 | - return ret; |
366 | -} |
367 | - |
368 | - |
369 | -static int32_t |
370 | -gf_rdma_post_recv (struct ibv_srq *srq, |
371 | - gf_rdma_post_t *post) |
372 | -{ |
373 | - struct ibv_sge list = { |
374 | - .addr = (unsigned long) post->buf, |
375 | - .length = post->buf_size, |
376 | - .lkey = post->mr->lkey |
377 | - }; |
378 | - |
379 | - struct ibv_recv_wr wr = { |
380 | - .wr_id = (unsigned long) post, |
381 | - .sg_list = &list, |
382 | - .num_sge = 1, |
383 | - }, *bad_wr; |
384 | - |
385 | - gf_rdma_post_ref (post); |
386 | - |
387 | - return ibv_post_srq_recv (srq, &wr, &bad_wr); |
388 | -} |
389 | - |
390 | - |
391 | -static int32_t |
392 | -gf_rdma_create_posts (rpc_transport_t *this) |
393 | -{ |
394 | - int32_t i = 0, ret = 0; |
395 | - gf_rdma_post_t *post = NULL; |
396 | - gf_rdma_private_t *priv = NULL; |
397 | - gf_rdma_options_t *options = NULL; |
398 | - gf_rdma_device_t *device = NULL; |
399 | - |
400 | - priv = this->private; |
401 | - options = &priv->options; |
402 | - device = priv->device; |
403 | - |
404 | - ret = __gf_rdma_create_posts (this, options->send_count, |
405 | - options->send_size, |
406 | - &device->sendq, GF_RDMA_SEND_POST); |
407 | - if (!ret) |
408 | - ret = __gf_rdma_create_posts (this, options->recv_count, |
409 | - options->recv_size, |
410 | - &device->recvq, |
411 | - GF_RDMA_RECV_POST); |
412 | - |
413 | - if (!ret) { |
414 | - for (i=0 ; i<options->recv_count ; i++) { |
415 | - post = gf_rdma_get_post (&device->recvq); |
416 | - if (gf_rdma_post_recv (device->srq, post) != 0) { |
417 | - ret = -1; |
418 | - break; |
419 | - } |
420 | - } |
421 | - } |
422 | - |
423 | - if (ret) |
424 | - gf_rdma_destroy_posts (this); |
425 | - |
426 | - return ret; |
427 | -} |
428 | - |
429 | - |
430 | -static void |
431 | -gf_rdma_destroy_cq (rpc_transport_t *this) |
432 | -{ |
433 | - gf_rdma_private_t *priv = NULL; |
434 | - gf_rdma_device_t *device = NULL; |
435 | - |
436 | - priv = this->private; |
437 | - device = priv->device; |
438 | - |
439 | - if (device->recv_cq) |
440 | - ibv_destroy_cq (device->recv_cq); |
441 | - device->recv_cq = NULL; |
442 | - |
443 | - if (device->send_cq) |
444 | - ibv_destroy_cq (device->send_cq); |
445 | - device->send_cq = NULL; |
446 | - |
447 | - return; |
448 | -} |
449 | - |
450 | - |
451 | -static int32_t |
452 | -gf_rdma_create_cq (rpc_transport_t *this) |
453 | -{ |
454 | - gf_rdma_private_t *priv = NULL; |
455 | - gf_rdma_options_t *options = NULL; |
456 | - gf_rdma_device_t *device = NULL; |
457 | - uint64_t send_cqe = 0; |
458 | - int32_t ret = 0; |
459 | - struct ibv_device_attr device_attr = {{0}, }; |
460 | - |
461 | - priv = this->private; |
462 | - options = &priv->options; |
463 | - device = priv->device; |
464 | - |
465 | - device->recv_cq = ibv_create_cq (priv->device->context, |
466 | - options->recv_count * 2, |
467 | - device, |
468 | - device->recv_chan, |
469 | - 0); |
470 | - if (!device->recv_cq) { |
471 | - gf_log (this->name, GF_LOG_ERROR, |
472 | - "creation of CQ for device %s failed", |
473 | - device->device_name); |
474 | - ret = -1; |
475 | - goto out; |
476 | - } else if (ibv_req_notify_cq (device->recv_cq, 0)) { |
477 | - gf_log (this->name, GF_LOG_ERROR, |
478 | - "ibv_req_notify_cq on recv CQ of device %s failed", |
479 | - device->device_name); |
480 | - ret = -1; |
481 | - goto out; |
482 | - } |
483 | - |
484 | - do { |
485 | - ret = ibv_query_device (priv->device->context, &device_attr); |
486 | - if (ret != 0) { |
487 | - gf_log (this->name, GF_LOG_ERROR, |
488 | - "ibv_query_device on %s returned %d (%s)", |
489 | - priv->device->device_name, ret, |
490 | - (ret > 0) ? strerror (ret) : ""); |
491 | - ret = -1; |
492 | - goto out; |
493 | - } |
494 | - |
495 | - send_cqe = options->send_count * 128; |
496 | - send_cqe = (send_cqe > device_attr.max_cqe) |
497 | - ? device_attr.max_cqe : send_cqe; |
498 | - |
499 | - /* TODO: make send_cq size dynamically adaptive */ |
500 | - device->send_cq = ibv_create_cq (priv->device->context, |
501 | - send_cqe, device, |
502 | - device->send_chan, 0); |
503 | - if (!device->send_cq) { |
504 | - gf_log (this->name, GF_LOG_ERROR, |
505 | - "creation of send_cq for device %s failed", |
506 | - device->device_name); |
507 | - ret = -1; |
508 | - goto out; |
509 | - } |
510 | - |
511 | - if (ibv_req_notify_cq (device->send_cq, 0)) { |
512 | - gf_log (this->name, GF_LOG_ERROR, |
513 | - "ibv_req_notify_cq on send_cq for device %s" |
514 | - " failed", device->device_name); |
515 | - ret = -1; |
516 | - goto out; |
517 | - } |
518 | - } while (0); |
519 | - |
520 | -out: |
521 | - if (ret != 0) |
522 | - gf_rdma_destroy_cq (this); |
523 | - |
524 | - return ret; |
525 | -} |
526 | - |
527 | - |
528 | -static gf_rdma_device_t * |
529 | -gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx, |
530 | - char *device_name) |
531 | -{ |
532 | - glusterfs_ctx_t *ctx = NULL; |
533 | - gf_rdma_private_t *priv = NULL; |
534 | - gf_rdma_options_t *options = NULL; |
535 | - int32_t ret = 0; |
536 | - int32_t i = 0; |
537 | - gf_rdma_device_t *trav = NULL, *device = NULL; |
538 | - gf_rdma_ctx_t *rdma_ctx = NULL; |
539 | - |
540 | - priv = this->private; |
541 | - options = &priv->options; |
542 | - ctx = this->ctx; |
543 | - rdma_ctx = ctx->ib; |
544 | - |
545 | - trav = rdma_ctx->device; |
546 | - |
547 | - while (trav) { |
548 | - if (!strcmp (trav->device_name, device_name)) |
549 | - break; |
550 | - trav = trav->next; |
551 | - } |
552 | - |
553 | - if (!trav) { |
554 | - trav = GF_CALLOC (1, sizeof (*trav), |
555 | - gf_common_mt_rdma_device_t); |
556 | - if (trav == NULL) { |
557 | - goto out; |
558 | - } |
559 | - |
560 | - priv->device = trav; |
561 | - trav->context = ibctx; |
562 | - |
563 | - trav->request_ctx_pool |
564 | - = mem_pool_new (gf_rdma_request_context_t, |
565 | - GF_RDMA_POOL_SIZE); |
566 | - if (trav->request_ctx_pool == NULL) { |
567 | - goto out; |
568 | - } |
569 | - |
570 | - trav->ioq_pool |
571 | - = mem_pool_new (gf_rdma_ioq_t, GF_RDMA_POOL_SIZE); |
572 | - if (trav->ioq_pool == NULL) { |
573 | - goto out; |
574 | - } |
575 | - |
576 | - trav->reply_info_pool = mem_pool_new (gf_rdma_reply_info_t, |
577 | - GF_RDMA_POOL_SIZE); |
578 | - if (trav->reply_info_pool == NULL) { |
579 | - goto out; |
580 | - } |
581 | - |
582 | - trav->device_name = gf_strdup (device_name); |
583 | - |
584 | - trav->next = rdma_ctx->device; |
585 | - rdma_ctx->device = trav; |
586 | - |
587 | - trav->send_chan = ibv_create_comp_channel (trav->context); |
588 | - if (!trav->send_chan) { |
589 | - gf_log (this->name, GF_LOG_ERROR, |
590 | - "could not create send completion channel for " |
591 | - "device (%s)", device_name); |
592 | - goto out; |
593 | - } |
594 | - |
595 | - trav->recv_chan = ibv_create_comp_channel (trav->context); |
596 | - if (!trav->recv_chan) { |
597 | - gf_log (this->name, GF_LOG_ERROR, |
598 | - "could not create recv completion channel for " |
599 | - "device (%s)", device_name); |
600 | - |
601 | - /* TODO: cleanup current mess */ |
602 | - goto out; |
603 | - } |
604 | - |
605 | - if (gf_rdma_create_cq (this) < 0) { |
606 | - gf_log (this->name, GF_LOG_ERROR, |
607 | - "could not create CQ for device (%s)", |
608 | - device_name); |
609 | - goto out; |
610 | - } |
611 | - |
612 | - /* protection domain */ |
613 | - trav->pd = ibv_alloc_pd (trav->context); |
614 | - |
615 | - if (!trav->pd) { |
616 | - gf_log (this->name, GF_LOG_ERROR, |
617 | - "could not allocate protection domain for " |
618 | - "device (%s)", device_name); |
619 | - goto out; |
620 | - } |
621 | - |
622 | - struct ibv_srq_init_attr attr = { |
623 | - .attr = { |
624 | - .max_wr = options->recv_count, |
625 | - .max_sge = 1, |
626 | - .srq_limit = 10 |
627 | - } |
628 | - }; |
629 | - trav->srq = ibv_create_srq (trav->pd, &attr); |
630 | - |
631 | - if (!trav->srq) { |
632 | - gf_log (this->name, GF_LOG_ERROR, |
633 | - "could not create SRQ for device (%s)", |
634 | - device_name); |
635 | - goto out; |
636 | - } |
637 | - |
638 | - /* queue init */ |
639 | - gf_rdma_queue_init (&trav->sendq); |
640 | - gf_rdma_queue_init (&trav->recvq); |
641 | - |
642 | - if (gf_rdma_create_posts (this) < 0) { |
643 | - gf_log (this->name, GF_LOG_ERROR, |
644 | - "could not allocate posts for device (%s)", |
645 | - device_name); |
646 | - goto out; |
647 | - } |
648 | - |
649 | - /* completion threads */ |
650 | - ret = pthread_create (&trav->send_thread, |
651 | - NULL, |
652 | - gf_rdma_send_completion_proc, |
653 | - trav->send_chan); |
654 | - if (ret) { |
655 | - gf_log (this->name, GF_LOG_ERROR, |
656 | - "could not create send completion thread for " |
657 | - "device (%s)", device_name); |
658 | - goto out; |
659 | - } |
660 | - |
661 | - ret = pthread_create (&trav->recv_thread, |
662 | - NULL, |
663 | - gf_rdma_recv_completion_proc, |
664 | - trav->recv_chan); |
665 | - if (ret) { |
666 | - gf_log (this->name, GF_LOG_ERROR, |
667 | - "could not create recv completion thread " |
668 | - "for device (%s)", device_name); |
669 | - return NULL; |
670 | - } |
671 | - |
672 | - ret = pthread_create (&trav->async_event_thread, |
673 | - NULL, |
674 | - gf_rdma_async_event_thread, |
675 | - ibctx); |
676 | - if (ret) { |
677 | - gf_log (this->name, GF_LOG_ERROR, |
678 | - "could not create async_event_thread"); |
679 | - return NULL; |
680 | - } |
681 | - |
682 | - /* qpreg */ |
683 | - pthread_mutex_init (&trav->qpreg.lock, NULL); |
684 | - for (i=0; i<42; i++) { |
685 | - trav->qpreg.ents[i].next = &trav->qpreg.ents[i]; |
686 | - trav->qpreg.ents[i].prev = &trav->qpreg.ents[i]; |
687 | - } |
688 | - } |
689 | - |
690 | - device = trav; |
691 | - trav = NULL; |
692 | -out: |
693 | - |
694 | - if (trav != NULL) { |
695 | - gf_rdma_destroy_posts (this); |
696 | - mem_pool_destroy (trav->ioq_pool); |
697 | - mem_pool_destroy (trav->request_ctx_pool); |
698 | - mem_pool_destroy (trav->reply_info_pool); |
699 | - ibv_dealloc_pd (trav->pd); |
700 | - gf_rdma_destroy_cq (this); |
701 | - ibv_destroy_comp_channel (trav->recv_chan); |
702 | - ibv_destroy_comp_channel (trav->send_chan); |
703 | - GF_FREE ((char *)trav->device_name); |
704 | - GF_FREE (trav); |
705 | - } |
706 | - |
707 | - return device; |
708 | -} |
709 | - |
710 | - |
711 | -static rpc_transport_t * |
712 | -gf_rdma_transport_new (rpc_transport_t *listener, struct rdma_cm_id *cm_id) |
713 | -{ |
714 | - gf_rdma_private_t *listener_priv = NULL, *priv = NULL; |
715 | - rpc_transport_t *this = NULL, *new = NULL; |
716 | - gf_rdma_options_t *options = NULL; |
717 | - char *device_name = NULL; |
718 | - |
719 | - listener_priv = listener->private; |
720 | - |
721 | - this = GF_CALLOC (1, sizeof (rpc_transport_t), |
722 | - gf_common_mt_rpc_transport_t); |
723 | - if (this == NULL) { |
724 | - goto out; |
725 | - } |
726 | - |
727 | - this->listener = listener; |
728 | - |
729 | - priv = GF_CALLOC (1, sizeof (gf_rdma_private_t), |
730 | - gf_common_mt_rdma_private_t); |
731 | - if (priv == NULL) { |
732 | - goto out; |
733 | - } |
734 | - |
735 | - this->private = priv; |
736 | - priv->options = listener_priv->options; |
737 | - |
738 | - priv->listener = listener; |
739 | - priv->entity = GF_RDMA_SERVER; |
740 | - |
741 | - options = &priv->options; |
742 | - |
743 | - this->ops = listener->ops; |
744 | - this->init = listener->init; |
745 | - this->fini = listener->fini; |
746 | - this->ctx = listener->ctx; |
747 | - this->name = gf_strdup (listener->name); |
748 | - this->notify = listener->notify; |
749 | - this->mydata = listener->mydata; |
750 | - |
751 | - this->myinfo.sockaddr_len = sizeof (cm_id->route.addr.src_addr); |
752 | - memcpy (&this->myinfo.sockaddr, &cm_id->route.addr.src_addr, |
753 | - this->myinfo.sockaddr_len); |
754 | - |
755 | - this->peerinfo.sockaddr_len = sizeof (cm_id->route.addr.dst_addr); |
756 | - memcpy (&this->peerinfo.sockaddr, &cm_id->route.addr.dst_addr, |
757 | - this->peerinfo.sockaddr_len); |
758 | - |
759 | - priv->peer.trans = this; |
760 | - gf_rdma_get_transport_identifiers (this); |
761 | - |
762 | - device_name = (char *)ibv_get_device_name (cm_id->verbs->device); |
763 | - if (device_name == NULL) { |
764 | - gf_log (listener->name, GF_LOG_WARNING, |
765 | - "cannot get device name (peer:%s me:%s)", |
766 | - this->peerinfo.identifier, this->myinfo.identifier); |
767 | - goto out; |
768 | - } |
769 | - |
770 | - priv->device = gf_rdma_get_device (this, cm_id->verbs, |
771 | - device_name); |
772 | - if (priv->device == NULL) { |
773 | - gf_log (listener->name, GF_LOG_WARNING, |
774 | - "cannot get infiniband device %s (peer:%s me:%s)", |
775 | - device_name, this->peerinfo.identifier, |
776 | - this->myinfo.identifier); |
777 | - goto out; |
778 | - } |
779 | - |
780 | - priv->peer.send_count = options->send_count; |
781 | - priv->peer.recv_count = options->recv_count; |
782 | - priv->peer.send_size = options->send_size; |
783 | - priv->peer.recv_size = options->recv_size; |
784 | - priv->peer.cm_id = cm_id; |
785 | - INIT_LIST_HEAD (&priv->peer.ioq); |
786 | - |
787 | - pthread_mutex_init (&priv->write_mutex, NULL); |
788 | - pthread_mutex_init (&priv->recv_mutex, NULL); |
789 | - |
790 | - cm_id->context = this; |
791 | - |
792 | - new = rpc_transport_ref (this); |
793 | - this = NULL; |
794 | -out: |
795 | - if (this != NULL) { |
796 | - if (this->private != NULL) { |
797 | - GF_FREE (this->private); |
798 | - } |
799 | - |
800 | - if (this->name != NULL) { |
801 | - GF_FREE (this->name); |
802 | - } |
803 | - |
804 | - GF_FREE (this); |
805 | - } |
806 | - |
807 | - return new; |
808 | -} |
809 | - |
810 | - |
811 | -static int |
812 | -gf_rdma_cm_handle_connect_request (struct rdma_cm_event *event) |
813 | -{ |
814 | - int ret = -1; |
815 | - rpc_transport_t *this = NULL, *listener = NULL; |
816 | - struct rdma_cm_id *child_cm_id = NULL, *listener_cm_id = NULL; |
817 | - struct rdma_conn_param conn_param = {0, }; |
818 | - gf_rdma_private_t *priv = NULL; |
819 | - gf_rdma_options_t *options = NULL; |
820 | - |
821 | - child_cm_id = event->id; |
822 | - listener_cm_id = event->listen_id; |
823 | - |
824 | - listener = listener_cm_id->context; |
825 | - priv = listener->private; |
826 | - options = &priv->options; |
827 | - |
828 | - this = gf_rdma_transport_new (listener, child_cm_id); |
829 | - if (this == NULL) { |
830 | - gf_log (listener->name, GF_LOG_WARNING, |
831 | - "could not create a transport for incoming connection" |
832 | - " (me.name:%s me.identifier:%s)", listener->name, |
833 | - listener->myinfo.identifier); |
834 | - rdma_destroy_id (child_cm_id); |
835 | - goto out; |
836 | - } |
837 | - |
838 | - gf_log (listener->name, GF_LOG_TRACE, |
839 | - "got a connect request (me:%s peer:%s)", |
840 | - listener->myinfo.identifier, this->peerinfo.identifier); |
841 | - |
842 | - ret = gf_rdma_create_qp (this); |
843 | - if (ret < 0) { |
844 | - gf_log (listener->name, GF_LOG_WARNING, |
845 | - "could not create QP (peer:%s me:%s)", |
846 | - this->peerinfo.identifier, this->myinfo.identifier); |
847 | - gf_rdma_cm_handle_disconnect (this); |
848 | - goto out; |
849 | - } |
850 | - |
851 | - conn_param.responder_resources = 1; |
852 | - conn_param.initiator_depth = 1; |
853 | - conn_param.retry_count = options->attr_retry_cnt; |
854 | - conn_param.rnr_retry_count = options->attr_rnr_retry; |
855 | - |
856 | - ret = rdma_accept(child_cm_id, &conn_param); |
857 | - if (ret < 0) { |
858 | - gf_log (listener->name, GF_LOG_WARNING, "rdma_accept failed " |
859 | - "peer:%s me:%s (%s)", this->peerinfo.identifier, |
860 | - this->myinfo.identifier, strerror (errno)); |
861 | - gf_rdma_cm_handle_disconnect (this); |
862 | - goto out; |
863 | - } |
864 | - |
865 | - ret = 0; |
866 | - |
867 | -out: |
868 | - return ret; |
869 | -} |
870 | - |
871 | - |
872 | -static int |
873 | -gf_rdma_cm_handle_route_resolved (struct rdma_cm_event *event) |
874 | -{ |
875 | - struct rdma_conn_param conn_param = {0, }; |
876 | - int ret = 0; |
877 | - rpc_transport_t *this = NULL; |
878 | - gf_rdma_private_t *priv = NULL; |
879 | - gf_rdma_peer_t *peer = NULL; |
880 | - gf_rdma_options_t *options = NULL; |
881 | - |
882 | - if (event == NULL) { |
883 | - goto out; |
884 | - } |
885 | - |
886 | - this = event->id->context; |
887 | - |
888 | - priv = this->private; |
889 | - peer = &priv->peer; |
890 | - options = &priv->options; |
891 | - |
892 | - ret = gf_rdma_create_qp (this); |
893 | - if (ret != 0) { |
894 | - gf_log (this->name, GF_LOG_WARNING, |
895 | - "could not create QP (peer:%s me:%s)", |
896 | - this->peerinfo.identifier, this->myinfo.identifier); |
897 | - gf_rdma_cm_handle_disconnect (this); |
898 | - goto out; |
899 | - } |
900 | - |
901 | - memset(&conn_param, 0, sizeof conn_param); |
902 | - conn_param.responder_resources = 1; |
903 | - conn_param.initiator_depth = 1; |
904 | - conn_param.retry_count = options->attr_retry_cnt; |
905 | - conn_param.rnr_retry_count = options->attr_rnr_retry; |
906 | - |
907 | - ret = rdma_connect(peer->cm_id, &conn_param); |
908 | - if (ret != 0) { |
909 | - gf_log (this->name, GF_LOG_WARNING, |
910 | - "rdma_connect failed (%s)", strerror (errno)); |
911 | - gf_rdma_cm_handle_disconnect (this); |
912 | - goto out; |
913 | - } |
914 | - |
915 | - gf_log (this->name, GF_LOG_TRACE, "route resolved (me:%s peer:%s)", |
916 | - this->myinfo.identifier, this->peerinfo.identifier); |
917 | - |
918 | - ret = 0; |
919 | -out: |
920 | - return ret; |
921 | -} |
922 | - |
923 | - |
924 | -static int |
925 | -gf_rdma_cm_handle_addr_resolved (struct rdma_cm_event *event) |
926 | -{ |
927 | - rpc_transport_t *this = NULL; |
928 | - gf_rdma_peer_t *peer = NULL; |
929 | - gf_rdma_private_t *priv = NULL; |
930 | - int ret = 0; |
931 | - |
932 | - this = event->id->context; |
933 | - |
934 | - priv = this->private; |
935 | - peer = &priv->peer; |
936 | - |
937 | - GF_ASSERT (peer->cm_id == event->id); |
938 | - |
939 | - this->myinfo.sockaddr_len = sizeof (peer->cm_id->route.addr.src_addr); |
940 | - memcpy (&this->myinfo.sockaddr, &peer->cm_id->route.addr.src_addr, |
941 | - this->myinfo.sockaddr_len); |
942 | - |
943 | - this->peerinfo.sockaddr_len = sizeof (peer->cm_id->route.addr.dst_addr); |
944 | - memcpy (&this->peerinfo.sockaddr, &peer->cm_id->route.addr.dst_addr, |
945 | - this->peerinfo.sockaddr_len); |
946 | - |
947 | - gf_rdma_get_transport_identifiers (this); |
948 | - |
949 | - ret = rdma_resolve_route(peer->cm_id, 2000); |
950 | - if (ret != 0) { |
951 | - gf_log (this->name, GF_LOG_WARNING, |
952 | - "rdma_resolve_route failed (me:%s peer:%s) (%s)", |
953 | - this->myinfo.identifier, this->peerinfo.identifier, |
954 | - strerror (errno)); |
955 | - gf_rdma_cm_handle_disconnect (this); |
956 | - } |
957 | - |
958 | - gf_log (this->name, GF_LOG_TRACE, "Address resolved (me:%s peer:%s)", |
959 | - this->myinfo.identifier, this->peerinfo.identifier); |
960 | - |
961 | - return ret; |
962 | -} |
963 | - |
964 | - |
965 | -static void |
966 | -gf_rdma_cm_handle_disconnect (rpc_transport_t *this) |
967 | -{ |
968 | - gf_rdma_private_t *priv = NULL; |
969 | - char need_unref = 0, connected = 0; |
970 | - |
971 | - priv = this->private; |
972 | - gf_log (this->name, GF_LOG_DEBUG, |
973 | - "peer disconnected, cleaning up"); |
974 | - |
975 | - pthread_mutex_lock (&priv->write_mutex); |
976 | - { |
977 | - if (priv->peer.cm_id != NULL) { |
978 | - need_unref = 1; |
979 | - connected = priv->connected; |
980 | - priv->connected = 0; |
981 | - } |
982 | - |
983 | - __gf_rdma_teardown (this); |
984 | - } |
985 | - pthread_mutex_unlock (&priv->write_mutex); |
986 | - |
987 | - if (connected) { |
988 | - rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this); |
989 | - } |
990 | - |
991 | - if (need_unref) |
992 | - rpc_transport_unref (this); |
993 | - |
994 | -} |
995 | - |
996 | - |
997 | -static int |
998 | -gf_rdma_cm_handle_event_established (struct rdma_cm_event *event) |
999 | -{ |
1000 | - rpc_transport_t *this = NULL; |
1001 | - gf_rdma_private_t *priv = NULL; |
1002 | - struct rdma_cm_id *cm_id = NULL; |
1003 | - int ret = 0; |
1004 | - |
1005 | - cm_id = event->id; |
1006 | - this = cm_id->context; |
1007 | - priv = this->private; |
1008 | - |
1009 | - priv->connected = 1; |
1010 | - |
1011 | - pthread_mutex_lock (&priv->write_mutex); |
1012 | - { |
1013 | - priv->peer.quota = 1; |
1014 | - priv->peer.quota_set = 0; |
1015 | - } |
1016 | - pthread_mutex_unlock (&priv->write_mutex); |
1017 | - |
1018 | - if (priv->entity == GF_RDMA_CLIENT) { |
1019 | - ret = rpc_transport_notify (this, RPC_TRANSPORT_CONNECT, this); |
1020 | - |
1021 | - } else if (priv->entity == GF_RDMA_SERVER) { |
1022 | - ret = rpc_transport_notify (priv->listener, |
1023 | - RPC_TRANSPORT_ACCEPT, this); |
1024 | - } |
1025 | - |
1026 | - if (ret < 0) { |
1027 | - gf_rdma_disconnect (this); |
1028 | - } |
1029 | - |
1030 | - gf_log (this->name, GF_LOG_TRACE, |
1031 | - "recieved event RDMA_CM_EVENT_ESTABLISHED (me:%s peer:%s)", |
1032 | - this->myinfo.identifier, this->peerinfo.identifier); |
1033 | - |
1034 | - return ret; |
1035 | -} |
1036 | - |
1037 | - |
1038 | -static int |
1039 | -gf_rdma_cm_handle_event_error (rpc_transport_t *this) |
1040 | -{ |
1041 | - gf_rdma_private_t *priv = NULL; |
1042 | - |
1043 | - priv = this->private; |
1044 | - |
1045 | - if (priv->entity != GF_RDMA_SERVER_LISTENER) { |
1046 | - gf_rdma_cm_handle_disconnect (this); |
1047 | - } |
1048 | - |
1049 | - return 0; |
1050 | -} |
1051 | - |
1052 | - |
1053 | -static int |
1054 | -gf_rdma_cm_handle_device_removal (struct rdma_cm_event *event) |
1055 | -{ |
1056 | - return 0; |
1057 | -} |
1058 | - |
1059 | - |
1060 | -static void * |
1061 | -gf_rdma_cm_event_handler (void *data) |
1062 | -{ |
1063 | - struct rdma_cm_event *event = NULL; |
1064 | - int ret = 0; |
1065 | - rpc_transport_t *this = NULL; |
1066 | - struct rdma_event_channel *event_channel = NULL; |
1067 | - |
1068 | - event_channel = data; |
1069 | - |
1070 | - while (1) { |
1071 | - ret = rdma_get_cm_event (event_channel, &event); |
1072 | - if (ret != 0) { |
1073 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1074 | - "rdma_cm_get_event failed (%s)", |
1075 | - strerror (errno)); |
1076 | - break; |
1077 | - } |
1078 | - |
1079 | - switch (event->event) { |
1080 | - case RDMA_CM_EVENT_ADDR_RESOLVED: |
1081 | - gf_rdma_cm_handle_addr_resolved (event); |
1082 | - break; |
1083 | - |
1084 | - case RDMA_CM_EVENT_ROUTE_RESOLVED: |
1085 | - gf_rdma_cm_handle_route_resolved (event); |
1086 | - break; |
1087 | - |
1088 | - case RDMA_CM_EVENT_CONNECT_REQUEST: |
1089 | - gf_rdma_cm_handle_connect_request (event); |
1090 | - break; |
1091 | - |
1092 | - case RDMA_CM_EVENT_ESTABLISHED: |
1093 | - gf_rdma_cm_handle_event_established (event); |
1094 | - break; |
1095 | - |
1096 | - case RDMA_CM_EVENT_ADDR_ERROR: |
1097 | - case RDMA_CM_EVENT_ROUTE_ERROR: |
1098 | - case RDMA_CM_EVENT_CONNECT_ERROR: |
1099 | - case RDMA_CM_EVENT_UNREACHABLE: |
1100 | - case RDMA_CM_EVENT_REJECTED: |
1101 | - this = event->id->context; |
1102 | - |
1103 | - gf_log (this->name, GF_LOG_WARNING, |
1104 | - "cma event %s, error %d (me:%s peer:%s)\n", |
1105 | - rdma_event_str(event->event), event->status, |
1106 | - this->myinfo.identifier, |
1107 | - this->peerinfo.identifier); |
1108 | - |
1109 | - rdma_ack_cm_event (event); |
1110 | - event = NULL; |
1111 | - |
1112 | - gf_rdma_cm_handle_event_error (this); |
1113 | - continue; |
1114 | - |
1115 | - case RDMA_CM_EVENT_DISCONNECTED: |
1116 | - this = event->id->context; |
1117 | - |
1118 | - gf_log (this->name, GF_LOG_DEBUG, |
1119 | - "recieved disconnect (me:%s peer:%s)\n", |
1120 | - this->myinfo.identifier, |
1121 | - this->peerinfo.identifier); |
1122 | - |
1123 | - rdma_ack_cm_event (event); |
1124 | - event = NULL; |
1125 | - |
1126 | - gf_rdma_cm_handle_disconnect (this); |
1127 | - continue; |
1128 | - |
1129 | - case RDMA_CM_EVENT_DEVICE_REMOVAL: |
1130 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1131 | - "device removed"); |
1132 | - gf_rdma_cm_handle_device_removal (event); |
1133 | - break; |
1134 | - |
1135 | - default: |
1136 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1137 | - "unhandled event: %s, ignoring", |
1138 | - rdma_event_str(event->event)); |
1139 | - break; |
1140 | - } |
1141 | - |
1142 | - rdma_ack_cm_event (event); |
1143 | - } |
1144 | - |
1145 | - return NULL; |
1146 | -} |
1147 | - |
1148 | - |
1149 | -static int32_t |
1150 | -gf_rdma_post_send (struct ibv_qp *qp, gf_rdma_post_t *post, int32_t len) |
1151 | -{ |
1152 | - struct ibv_sge list = { |
1153 | - .addr = (unsigned long) post->buf, |
1154 | - .length = len, |
1155 | - .lkey = post->mr->lkey |
1156 | - }; |
1157 | - |
1158 | - struct ibv_send_wr wr = { |
1159 | - .wr_id = (unsigned long) post, |
1160 | - .sg_list = &list, |
1161 | - .num_sge = 1, |
1162 | - .opcode = IBV_WR_SEND, |
1163 | - .send_flags = IBV_SEND_SIGNALED, |
1164 | - }, *bad_wr; |
1165 | - |
1166 | - if (!qp) |
1167 | - return EINVAL; |
1168 | - |
1169 | - return ibv_post_send (qp, &wr, &bad_wr); |
1170 | -} |
1171 | - |
1172 | -int |
1173 | -__gf_rdma_encode_error(gf_rdma_peer_t *peer, gf_rdma_reply_info_t *reply_info, |
1174 | - struct iovec *rpchdr, gf_rdma_header_t *hdr, |
1175 | - gf_rdma_errcode_t err) |
1176 | -{ |
1177 | - struct rpc_msg *rpc_msg = NULL; |
1178 | - |
1179 | - if (reply_info != NULL) { |
1180 | - hdr->rm_xid = hton32(reply_info->rm_xid); |
1181 | - } else { |
1182 | - rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains |
1183 | - * only one vector. |
1184 | - * (which is true) |
1185 | - */ |
1186 | - hdr->rm_xid = rpc_msg->rm_xid; |
1187 | - } |
1188 | - |
1189 | - hdr->rm_vers = hton32(GF_RDMA_VERSION); |
1190 | - hdr->rm_credit = hton32(peer->send_count); |
1191 | - hdr->rm_type = hton32(GF_RDMA_ERROR); |
1192 | - hdr->rm_body.rm_error.rm_type = hton32(err); |
1193 | - if (err == ERR_VERS) { |
1194 | - hdr->rm_body.rm_error.rm_version.gf_rdma_vers_low |
1195 | - = hton32(GF_RDMA_VERSION); |
1196 | - hdr->rm_body.rm_error.rm_version.gf_rdma_vers_high |
1197 | - = hton32(GF_RDMA_VERSION); |
1198 | - } |
1199 | - |
1200 | - return sizeof (*hdr); |
1201 | -} |
1202 | - |
1203 | - |
1204 | -int32_t |
1205 | -__gf_rdma_send_error (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry, |
1206 | - gf_rdma_post_t *post, gf_rdma_reply_info_t *reply_info, |
1207 | - gf_rdma_errcode_t err) |
1208 | -{ |
1209 | - int32_t ret = -1, len = 0; |
1210 | - |
1211 | - len = __gf_rdma_encode_error (peer, reply_info, entry->rpchdr, |
1212 | - (gf_rdma_header_t *)post->buf, err); |
1213 | - if (len == -1) { |
1214 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR, |
1215 | - "encode error returned -1"); |
1216 | - goto out; |
1217 | - } |
1218 | - |
1219 | - gf_rdma_post_ref (post); |
1220 | - |
1221 | - ret = gf_rdma_post_send (peer->qp, post, len); |
1222 | - if (!ret) { |
1223 | - ret = len; |
1224 | - } else { |
1225 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1226 | - "gf_rdma_post_send (to %s) failed with ret = %d (%s)", |
1227 | - peer->trans->peerinfo.identifier, ret, |
1228 | - (ret > 0) ? strerror (ret) : ""); |
1229 | - gf_rdma_post_unref (post); |
1230 | - __gf_rdma_disconnect (peer->trans); |
1231 | - ret = -1; |
1232 | - } |
1233 | - |
1234 | -out: |
1235 | - return ret; |
1236 | -} |
1237 | - |
1238 | - |
1239 | -int32_t |
1240 | -__gf_rdma_create_read_chunks_from_vector (gf_rdma_peer_t *peer, |
1241 | - gf_rdma_read_chunk_t **readch_ptr, |
1242 | - int32_t *pos, struct iovec *vector, |
1243 | - int count, |
1244 | - gf_rdma_request_context_t *request_ctx) |
1245 | -{ |
1246 | - int i = 0; |
1247 | - gf_rdma_private_t *priv = NULL; |
1248 | - gf_rdma_device_t *device = NULL; |
1249 | - struct ibv_mr *mr = NULL; |
1250 | - gf_rdma_read_chunk_t *readch = NULL; |
1251 | - int32_t ret = -1; |
1252 | - |
1253 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out); |
1254 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, readch_ptr, out); |
1255 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *readch_ptr, out); |
1256 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out); |
1257 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out); |
1258 | - |
1259 | - priv = peer->trans->private; |
1260 | - device = priv->device; |
1261 | - readch = *readch_ptr; |
1262 | - |
1263 | - for (i = 0; i < count; i++) { |
1264 | - readch->rc_discrim = hton32 (1); |
1265 | - readch->rc_position = hton32 (*pos); |
1266 | - |
1267 | - mr = ibv_reg_mr (device->pd, vector[i].iov_base, |
1268 | - vector[i].iov_len, |
1269 | - IBV_ACCESS_REMOTE_READ); |
1270 | - if (!mr) { |
1271 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1272 | - "memory registration failed (%s) (peer:%s)", |
1273 | - strerror (errno), |
1274 | - peer->trans->peerinfo.identifier); |
1275 | - goto out; |
1276 | - } |
1277 | - |
1278 | - request_ctx->mr[request_ctx->mr_count++] = mr; |
1279 | - |
1280 | - readch->rc_target.rs_handle = hton32 (mr->rkey); |
1281 | - readch->rc_target.rs_length |
1282 | - = hton32 (vector[i].iov_len); |
1283 | - readch->rc_target.rs_offset |
1284 | - = hton64 ((uint64_t)(unsigned long)vector[i].iov_base); |
1285 | - |
1286 | - *pos = *pos + vector[i].iov_len; |
1287 | - readch++; |
1288 | - } |
1289 | - |
1290 | - *readch_ptr = readch; |
1291 | - |
1292 | - ret = 0; |
1293 | -out: |
1294 | - return ret; |
1295 | -} |
1296 | - |
1297 | - |
1298 | -int32_t |
1299 | -__gf_rdma_create_read_chunks (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry, |
1300 | - gf_rdma_chunktype_t type, uint32_t **ptr, |
1301 | - gf_rdma_request_context_t *request_ctx) |
1302 | -{ |
1303 | - int32_t ret = -1; |
1304 | - int pos = 0; |
1305 | - |
1306 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out); |
1307 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out); |
1308 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ptr, out); |
1309 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *ptr, out); |
1310 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out); |
1311 | - |
1312 | - request_ctx->iobref = iobref_ref (entry->iobref); |
1313 | - |
1314 | - if (type == gf_rdma_areadch) { |
1315 | - pos = 0; |
1316 | - ret = __gf_rdma_create_read_chunks_from_vector (peer, |
1317 | - (gf_rdma_read_chunk_t **)ptr, |
1318 | - &pos, |
1319 | - entry->rpchdr, |
1320 | - entry->rpchdr_count, |
1321 | - request_ctx); |
1322 | - if (ret == -1) { |
1323 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1324 | - "cannot create read chunks from vector " |
1325 | - "entry->rpchdr"); |
1326 | - goto out; |
1327 | - } |
1328 | - |
1329 | - ret = __gf_rdma_create_read_chunks_from_vector (peer, |
1330 | - (gf_rdma_read_chunk_t **)ptr, |
1331 | - &pos, |
1332 | - entry->proghdr, |
1333 | - entry->proghdr_count, |
1334 | - request_ctx); |
1335 | - if (ret == -1) { |
1336 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1337 | - "cannot create read chunks from vector " |
1338 | - "entry->proghdr"); |
1339 | - } |
1340 | - |
1341 | - if (entry->prog_payload_count != 0) { |
1342 | - ret = __gf_rdma_create_read_chunks_from_vector (peer, |
1343 | - (gf_rdma_read_chunk_t **)ptr, |
1344 | - &pos, |
1345 | - entry->prog_payload, |
1346 | - entry->prog_payload_count, |
1347 | - request_ctx); |
1348 | - if (ret == -1) { |
1349 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1350 | - "cannot create read chunks from vector" |
1351 | - " entry->prog_payload"); |
1352 | - } |
1353 | - } |
1354 | - } else { |
1355 | - pos = iov_length (entry->rpchdr, entry->rpchdr_count); |
1356 | - ret = __gf_rdma_create_read_chunks_from_vector (peer, |
1357 | - (gf_rdma_read_chunk_t **)ptr, |
1358 | - &pos, |
1359 | - entry->prog_payload, |
1360 | - entry->prog_payload_count, |
1361 | - request_ctx); |
1362 | - if (ret == -1) { |
1363 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1364 | - "cannot create read chunks from vector " |
1365 | - "entry->prog_payload"); |
1366 | - } |
1367 | - } |
1368 | - |
1369 | - /* terminate read-chunk list*/ |
1370 | - **ptr = 0; |
1371 | - *ptr = *ptr + 1; |
1372 | -out: |
1373 | - return ret; |
1374 | -} |
1375 | - |
1376 | - |
1377 | -int32_t |
1378 | -__gf_rdma_create_write_chunks_from_vector (gf_rdma_peer_t *peer, |
1379 | - gf_rdma_write_chunk_t **writech_ptr, |
1380 | - struct iovec *vector, int count, |
1381 | - gf_rdma_request_context_t *request_ctx) |
1382 | -{ |
1383 | - int i = 0; |
1384 | - gf_rdma_private_t *priv = NULL; |
1385 | - gf_rdma_device_t *device = NULL; |
1386 | - struct ibv_mr *mr = NULL; |
1387 | - gf_rdma_write_chunk_t *writech = NULL; |
1388 | - int32_t ret = -1; |
1389 | - |
1390 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out); |
1391 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, writech_ptr, out); |
1392 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *writech_ptr, out); |
1393 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out); |
1394 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out); |
1395 | - |
1396 | - writech = *writech_ptr; |
1397 | - |
1398 | - priv = peer->trans->private; |
1399 | - device = priv->device; |
1400 | - |
1401 | - for (i = 0; i < count; i++) { |
1402 | - mr = ibv_reg_mr (device->pd, vector[i].iov_base, |
1403 | - vector[i].iov_len, |
1404 | - IBV_ACCESS_REMOTE_WRITE |
1405 | - | IBV_ACCESS_LOCAL_WRITE); |
1406 | - if (!mr) { |
1407 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1408 | - "memory registration failed (%s) (peer:%s)", |
1409 | - strerror (errno), |
1410 | - peer->trans->peerinfo.identifier); |
1411 | - goto out; |
1412 | - } |
1413 | - |
1414 | - request_ctx->mr[request_ctx->mr_count++] = mr; |
1415 | - |
1416 | - writech->wc_target.rs_handle = hton32 (mr->rkey); |
1417 | - writech->wc_target.rs_length = hton32 (vector[i].iov_len); |
1418 | - writech->wc_target.rs_offset |
1419 | - = hton64 (((uint64_t)(unsigned long)vector[i].iov_base)); |
1420 | - |
1421 | - writech++; |
1422 | - } |
1423 | - |
1424 | - *writech_ptr = writech; |
1425 | - |
1426 | - ret = 0; |
1427 | -out: |
1428 | - return ret; |
1429 | -} |
1430 | - |
1431 | - |
1432 | -int32_t |
1433 | -__gf_rdma_create_write_chunks (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry, |
1434 | - gf_rdma_chunktype_t chunk_type, uint32_t **ptr, |
1435 | - gf_rdma_request_context_t *request_ctx) |
1436 | -{ |
1437 | - int32_t ret = -1; |
1438 | - gf_rdma_write_array_t *warray = NULL; |
1439 | - |
1440 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out); |
1441 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ptr, out); |
1442 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *ptr, out); |
1443 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out); |
1444 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out); |
1445 | - |
1446 | - if ((chunk_type == gf_rdma_replych) |
1447 | - && ((entry->msg.request.rsphdr_count != 1) || |
1448 | - (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) { |
1449 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1450 | - (entry->msg.request.rsphdr_count == 1) |
1451 | - ? "chunktype specified as reply chunk but the vector " |
1452 | - "specifying the buffer to be used for holding reply" |
1453 | - " header is not correct" : |
1454 | - "chunktype specified as reply chunk, but more than one " |
1455 | - "buffer provided for holding reply"); |
1456 | - goto out; |
1457 | - } |
1458 | - |
1459 | -/* |
1460 | - if ((chunk_type == gf_rdma_writech) |
1461 | - && ((entry->msg.request.rsphdr_count == 0) |
1462 | - || (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) { |
1463 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG, |
1464 | - "vector specifying buffer to hold the program's reply " |
1465 | - "header should also be provided when buffers are " |
1466 | - "provided for holding the program's payload in reply"); |
1467 | - goto out; |
1468 | - } |
1469 | -*/ |
1470 | - |
1471 | - if (chunk_type == gf_rdma_writech) { |
1472 | - warray = (gf_rdma_write_array_t *)*ptr; |
1473 | - warray->wc_discrim = hton32 (1); |
1474 | - warray->wc_nchunks |
1475 | - = hton32 (entry->msg.request.rsp_payload_count); |
1476 | - |
1477 | - *ptr = (uint32_t *)&warray->wc_array[0]; |
1478 | - |
1479 | - ret = __gf_rdma_create_write_chunks_from_vector (peer, |
1480 | - (gf_rdma_write_chunk_t **)ptr, |
1481 | - entry->msg.request.rsp_payload, |
1482 | - entry->msg.request.rsp_payload_count, |
1483 | - request_ctx); |
1484 | - if (ret == -1) { |
1485 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1486 | - "cannot create write chunks from vector " |
1487 | - "entry->rpc_payload"); |
1488 | - goto out; |
1489 | - } |
1490 | - |
1491 | - /* terminate write chunklist */ |
1492 | - **ptr = 0; |
1493 | - *ptr = *ptr + 1; |
1494 | - |
1495 | - /* no reply chunklist */ |
1496 | - **ptr = 0; |
1497 | - *ptr = *ptr + 1; |
1498 | - } else { |
1499 | - /* no write chunklist */ |
1500 | - **ptr = 0; |
1501 | - *ptr = *ptr + 1; |
1502 | - |
1503 | - warray = (gf_rdma_write_array_t *)*ptr; |
1504 | - warray->wc_discrim = hton32 (1); |
1505 | - warray->wc_nchunks = hton32 (entry->msg.request.rsphdr_count); |
1506 | - |
1507 | - *ptr = (uint32_t *)&warray->wc_array[0]; |
1508 | - |
1509 | - ret = __gf_rdma_create_write_chunks_from_vector (peer, |
1510 | - (gf_rdma_write_chunk_t **)ptr, |
1511 | - entry->msg.request.rsphdr_vec, |
1512 | - entry->msg.request.rsphdr_count, |
1513 | - request_ctx); |
1514 | - if (ret == -1) { |
1515 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1516 | - "cannot create write chunks from vector " |
1517 | - "entry->rpchdr"); |
1518 | - goto out; |
1519 | - } |
1520 | - |
1521 | - /* terminate reply chunklist */ |
1522 | - **ptr = 0; |
1523 | - *ptr = *ptr + 1; |
1524 | - } |
1525 | - |
1526 | -out: |
1527 | - return ret; |
1528 | -} |
1529 | - |
1530 | - |
1531 | -static inline void |
1532 | -__gf_rdma_deregister_mr (struct ibv_mr **mr, int count) |
1533 | -{ |
1534 | - int i = 0; |
1535 | - |
1536 | - if (mr == NULL) { |
1537 | - goto out; |
1538 | - } |
1539 | - |
1540 | - for (i = 0; i < count; i++) { |
1541 | - ibv_dereg_mr (mr[i]); |
1542 | - } |
1543 | - |
1544 | -out: |
1545 | - return; |
1546 | -} |
1547 | - |
1548 | - |
1549 | -static int32_t |
1550 | -__gf_rdma_quota_put (gf_rdma_peer_t *peer) |
1551 | -{ |
1552 | - int32_t ret = 0; |
1553 | - |
1554 | - peer->quota++; |
1555 | - ret = peer->quota; |
1556 | - |
1557 | - if (!list_empty (&peer->ioq)) { |
1558 | - ret = __gf_rdma_ioq_churn (peer); |
1559 | - } |
1560 | - |
1561 | - return ret; |
1562 | -} |
1563 | - |
1564 | - |
1565 | -static int32_t |
1566 | -gf_rdma_quota_put (gf_rdma_peer_t *peer) |
1567 | -{ |
1568 | - int32_t ret = 0; |
1569 | - gf_rdma_private_t *priv = NULL; |
1570 | - |
1571 | - priv = peer->trans->private; |
1572 | - pthread_mutex_lock (&priv->write_mutex); |
1573 | - { |
1574 | - ret = __gf_rdma_quota_put (peer); |
1575 | - } |
1576 | - pthread_mutex_unlock (&priv->write_mutex); |
1577 | - |
1578 | - return ret; |
1579 | -} |
1580 | - |
1581 | - |
1582 | -/* to be called with priv->mutex held */ |
1583 | -void |
1584 | -__gf_rdma_request_context_destroy (gf_rdma_request_context_t *context) |
1585 | -{ |
1586 | - gf_rdma_peer_t *peer = NULL; |
1587 | - gf_rdma_private_t *priv = NULL; |
1588 | - int32_t ret = 0; |
1589 | - |
1590 | - if (context == NULL) { |
1591 | - goto out; |
1592 | - } |
1593 | - |
1594 | - peer = context->peer; |
1595 | - |
1596 | - __gf_rdma_deregister_mr (context->mr, context->mr_count); |
1597 | - |
1598 | - priv = peer->trans->private; |
1599 | - |
1600 | - if (priv->connected) { |
1601 | - ret = __gf_rdma_quota_put (peer); |
1602 | - if (ret < 0) { |
1603 | - gf_log ("rdma", GF_LOG_DEBUG, |
1604 | - "failed to send " |
1605 | - "message"); |
1606 | - mem_put (context); |
1607 | - __gf_rdma_disconnect (peer->trans); |
1608 | - goto out; |
1609 | - } |
1610 | - } |
1611 | - |
1612 | - if (context->iobref != NULL) { |
1613 | - iobref_unref (context->iobref); |
1614 | - context->iobref = NULL; |
1615 | - } |
1616 | - |
1617 | - if (context->rsp_iobref != NULL) { |
1618 | - iobref_unref (context->rsp_iobref); |
1619 | - context->rsp_iobref = NULL; |
1620 | - } |
1621 | - |
1622 | - mem_put (context); |
1623 | - |
1624 | -out: |
1625 | - return; |
1626 | -} |
1627 | - |
1628 | - |
1629 | -void |
1630 | -gf_rdma_post_context_destroy (gf_rdma_post_context_t *ctx) |
1631 | -{ |
1632 | - if (ctx == NULL) { |
1633 | - goto out; |
1634 | - } |
1635 | - |
1636 | - __gf_rdma_deregister_mr (ctx->mr, ctx->mr_count); |
1637 | - |
1638 | - if (ctx->iobref != NULL) { |
1639 | - iobref_unref (ctx->iobref); |
1640 | - } |
1641 | - |
1642 | - if (ctx->hdr_iobuf != NULL) { |
1643 | - iobuf_unref (ctx->hdr_iobuf); |
1644 | - } |
1645 | - |
1646 | - memset (ctx, 0, sizeof (*ctx)); |
1647 | -out: |
1648 | - return; |
1649 | -} |
1650 | - |
1651 | - |
1652 | -int |
1653 | -gf_rdma_post_unref (gf_rdma_post_t *post) |
1654 | -{ |
1655 | - int refcount = -1; |
1656 | - |
1657 | - if (post == NULL) { |
1658 | - goto out; |
1659 | - } |
1660 | - |
1661 | - pthread_mutex_lock (&post->lock); |
1662 | - { |
1663 | - refcount = --post->refcount; |
1664 | - } |
1665 | - pthread_mutex_unlock (&post->lock); |
1666 | - |
1667 | - if (refcount == 0) { |
1668 | - gf_rdma_post_context_destroy (&post->ctx); |
1669 | - if (post->type == GF_RDMA_SEND_POST) { |
1670 | - gf_rdma_put_post (&post->device->sendq, post); |
1671 | - } else { |
1672 | - gf_rdma_post_recv (post->device->srq, post); |
1673 | - } |
1674 | - } |
1675 | -out: |
1676 | - return refcount; |
1677 | -} |
1678 | - |
1679 | - |
1680 | -int |
1681 | -gf_rdma_post_get_refcount (gf_rdma_post_t *post) |
1682 | -{ |
1683 | - int refcount = -1; |
1684 | - |
1685 | - if (post == NULL) { |
1686 | - goto out; |
1687 | - } |
1688 | - |
1689 | - pthread_mutex_lock (&post->lock); |
1690 | - { |
1691 | - refcount = post->refcount; |
1692 | - } |
1693 | - pthread_mutex_unlock (&post->lock); |
1694 | - |
1695 | -out: |
1696 | - return refcount; |
1697 | -} |
1698 | - |
1699 | -gf_rdma_post_t * |
1700 | -gf_rdma_post_ref (gf_rdma_post_t *post) |
1701 | -{ |
1702 | - if (post == NULL) { |
1703 | - goto out; |
1704 | - } |
1705 | - |
1706 | - pthread_mutex_lock (&post->lock); |
1707 | - { |
1708 | - post->refcount++; |
1709 | - } |
1710 | - pthread_mutex_unlock (&post->lock); |
1711 | - |
1712 | -out: |
1713 | - return post; |
1714 | -} |
1715 | - |
1716 | - |
1717 | -int32_t |
1718 | -__gf_rdma_ioq_churn_request (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry, |
1719 | - gf_rdma_post_t *post) |
1720 | -{ |
1721 | - gf_rdma_chunktype_t rtype = gf_rdma_noch; |
1722 | - gf_rdma_chunktype_t wtype = gf_rdma_noch; |
1723 | - uint64_t send_size = 0; |
1724 | - gf_rdma_header_t *hdr = NULL; |
1725 | - struct rpc_msg *rpc_msg = NULL; |
1726 | - uint32_t *chunkptr = NULL; |
1727 | - char *buf = NULL; |
1728 | - int32_t ret = 0; |
1729 | - gf_rdma_private_t *priv = NULL; |
1730 | - gf_rdma_device_t *device = NULL; |
1731 | - int chunk_count = 0; |
1732 | - gf_rdma_request_context_t *request_ctx = NULL; |
1733 | - uint32_t prog_payload_length = 0, len = 0; |
1734 | - struct rpc_req *rpc_req = NULL; |
1735 | - |
1736 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out); |
1737 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out); |
1738 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, post, out); |
1739 | - |
1740 | - if ((entry->msg.request.rsphdr_count != 0) |
1741 | - && (entry->msg.request.rsp_payload_count != 0)) { |
1742 | - ret = -1; |
1743 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1744 | - "both write-chunklist and reply-chunk cannot be " |
1745 | - "present"); |
1746 | - goto out; |
1747 | - } |
1748 | - |
1749 | - post->ctx.is_request = 1; |
1750 | - priv = peer->trans->private; |
1751 | - device = priv->device; |
1752 | - |
1753 | - hdr = (gf_rdma_header_t *)post->buf; |
1754 | - |
1755 | - send_size = iov_length (entry->rpchdr, entry->rpchdr_count) |
1756 | - + iov_length (entry->proghdr, entry->proghdr_count) |
1757 | - + GLUSTERFS_RDMA_MAX_HEADER_SIZE; |
1758 | - |
1759 | - if (entry->prog_payload_count != 0) { |
1760 | - prog_payload_length |
1761 | - = iov_length (entry->prog_payload, |
1762 | - entry->prog_payload_count); |
1763 | - } |
1764 | - |
1765 | - if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) { |
1766 | - rtype = gf_rdma_areadch; |
1767 | - } else if ((send_size + prog_payload_length) |
1768 | - < GLUSTERFS_RDMA_INLINE_THRESHOLD) { |
1769 | - rtype = gf_rdma_noch; |
1770 | - } else if (entry->prog_payload_count != 0) { |
1771 | - rtype = gf_rdma_readch; |
1772 | - } |
1773 | - |
1774 | - if (entry->msg.request.rsphdr_count != 0) { |
1775 | - wtype = gf_rdma_replych; |
1776 | - } else if (entry->msg.request.rsp_payload_count != 0) { |
1777 | - wtype = gf_rdma_writech; |
1778 | - } |
1779 | - |
1780 | - if (rtype == gf_rdma_readch) { |
1781 | - chunk_count += entry->prog_payload_count; |
1782 | - } else if (rtype == gf_rdma_areadch) { |
1783 | - chunk_count += entry->rpchdr_count; |
1784 | - chunk_count += entry->proghdr_count; |
1785 | - } |
1786 | - |
1787 | - if (wtype == gf_rdma_writech) { |
1788 | - chunk_count += entry->msg.request.rsp_payload_count; |
1789 | - } else if (wtype == gf_rdma_replych) { |
1790 | - chunk_count += entry->msg.request.rsphdr_count; |
1791 | - } |
1792 | - |
1793 | - if (chunk_count > GF_RDMA_MAX_SEGMENTS) { |
1794 | - ret = -1; |
1795 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1796 | - "chunk count(%d) exceeding maximum allowed RDMA " |
1797 | - "segment count(%d)", chunk_count, GF_RDMA_MAX_SEGMENTS); |
1798 | - goto out; |
1799 | - } |
1800 | - |
1801 | - if ((wtype != gf_rdma_noch) || (rtype != gf_rdma_noch)) { |
1802 | - request_ctx = mem_get (device->request_ctx_pool); |
1803 | - if (request_ctx == NULL) { |
1804 | - ret = -1; |
1805 | - goto out; |
1806 | - } |
1807 | - |
1808 | - memset (request_ctx, 0, sizeof (*request_ctx)); |
1809 | - |
1810 | - request_ctx->pool = device->request_ctx_pool; |
1811 | - request_ctx->peer = peer; |
1812 | - |
1813 | - entry->msg.request.rpc_req->conn_private = request_ctx; |
1814 | - |
1815 | - if (entry->msg.request.rsp_iobref != NULL) { |
1816 | - request_ctx->rsp_iobref |
1817 | - = iobref_ref (entry->msg.request.rsp_iobref); |
1818 | - } |
1819 | - } |
1820 | - |
1821 | - rpc_msg = (struct rpc_msg *) entry->rpchdr[0].iov_base; |
1822 | - |
1823 | - hdr->rm_xid = rpc_msg->rm_xid; /* no need of hton32(rpc_msg->rm_xid), |
1824 | - * since rpc_msg->rm_xid is already |
1825 | - * hton32ed value of actual xid |
1826 | - */ |
1827 | - hdr->rm_vers = hton32 (GF_RDMA_VERSION); |
1828 | - hdr->rm_credit = hton32 (peer->send_count); |
1829 | - if (rtype == gf_rdma_areadch) { |
1830 | - hdr->rm_type = hton32 (GF_RDMA_NOMSG); |
1831 | - } else { |
1832 | - hdr->rm_type = hton32 (GF_RDMA_MSG); |
1833 | - } |
1834 | - |
1835 | - chunkptr = &hdr->rm_body.rm_chunks[0]; |
1836 | - if (rtype != gf_rdma_noch) { |
1837 | - ret = __gf_rdma_create_read_chunks (peer, entry, rtype, |
1838 | - &chunkptr, |
1839 | - request_ctx); |
1840 | - if (ret != 0) { |
1841 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1842 | - "creation of read chunks failed"); |
1843 | - goto out; |
1844 | - } |
1845 | - } else { |
1846 | - *chunkptr++ = 0; /* no read chunks */ |
1847 | - } |
1848 | - |
1849 | - if (wtype != gf_rdma_noch) { |
1850 | - ret = __gf_rdma_create_write_chunks (peer, entry, wtype, |
1851 | - &chunkptr, |
1852 | - request_ctx); |
1853 | - if (ret != 0) { |
1854 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1855 | - "creation of write/reply chunk failed"); |
1856 | - goto out; |
1857 | - } |
1858 | - } else { |
1859 | - *chunkptr++ = 0; /* no write chunks */ |
1860 | - *chunkptr++ = 0; /* no reply chunk */ |
1861 | - } |
1862 | - |
1863 | - buf = (char *)chunkptr; |
1864 | - |
1865 | - if (rtype != gf_rdma_areadch) { |
1866 | - iov_unload (buf, entry->rpchdr, entry->rpchdr_count); |
1867 | - buf += iov_length (entry->rpchdr, entry->rpchdr_count); |
1868 | - |
1869 | - iov_unload (buf, entry->proghdr, entry->proghdr_count); |
1870 | - buf += iov_length (entry->proghdr, entry->proghdr_count); |
1871 | - |
1872 | - if (rtype != gf_rdma_readch) { |
1873 | - iov_unload (buf, entry->prog_payload, |
1874 | - entry->prog_payload_count); |
1875 | - buf += iov_length (entry->prog_payload, |
1876 | - entry->prog_payload_count); |
1877 | - } |
1878 | - } |
1879 | - |
1880 | - len = buf - post->buf; |
1881 | - |
1882 | - gf_rdma_post_ref (post); |
1883 | - |
1884 | - ret = gf_rdma_post_send (peer->qp, post, len); |
1885 | - if (!ret) { |
1886 | - ret = len; |
1887 | - } else { |
1888 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1889 | - "gf_rdma_post_send (to %s) failed with ret = %d (%s)", |
1890 | - peer->trans->peerinfo.identifier, ret, |
1891 | - (ret > 0) ? strerror (ret) : ""); |
1892 | - gf_rdma_post_unref (post); |
1893 | - __gf_rdma_disconnect (peer->trans); |
1894 | - ret = -1; |
1895 | - } |
1896 | - |
1897 | -out: |
1898 | - if (ret == -1) { |
1899 | - rpc_req = entry->msg.request.rpc_req; |
1900 | - |
1901 | - if (request_ctx != NULL) { |
1902 | - __gf_rdma_request_context_destroy (rpc_req->conn_private); |
1903 | - } |
1904 | - |
1905 | - rpc_req->conn_private = NULL; |
1906 | - } |
1907 | - |
1908 | - return ret; |
1909 | -} |
1910 | - |
1911 | - |
1912 | -static inline void |
1913 | -__gf_rdma_fill_reply_header (gf_rdma_header_t *header, struct iovec *rpchdr, |
1914 | - gf_rdma_reply_info_t *reply_info, int credits) |
1915 | -{ |
1916 | - struct rpc_msg *rpc_msg = NULL; |
1917 | - |
1918 | - if (reply_info != NULL) { |
1919 | - header->rm_xid = hton32 (reply_info->rm_xid); |
1920 | - } else { |
1921 | - rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains |
1922 | - * only one vector. |
1923 | - * (which is true) |
1924 | - */ |
1925 | - header->rm_xid = rpc_msg->rm_xid; |
1926 | - } |
1927 | - |
1928 | - header->rm_type = hton32 (GF_RDMA_MSG); |
1929 | - header->rm_vers = hton32 (GF_RDMA_VERSION); |
1930 | - header->rm_credit = hton32 (credits); |
1931 | - |
1932 | - header->rm_body.rm_chunks[0] = 0; /* no read chunks */ |
1933 | - header->rm_body.rm_chunks[1] = 0; /* no write chunks */ |
1934 | - header->rm_body.rm_chunks[2] = 0; /* no reply chunks */ |
1935 | - |
1936 | - return; |
1937 | -} |
1938 | - |
1939 | - |
1940 | -int32_t |
1941 | -__gf_rdma_send_reply_inline (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry, |
1942 | - gf_rdma_post_t *post, |
1943 | - gf_rdma_reply_info_t *reply_info) |
1944 | -{ |
1945 | - gf_rdma_header_t *header = NULL; |
1946 | - int32_t send_size = 0, ret = 0; |
1947 | - char *buf = NULL; |
1948 | - |
1949 | - send_size = iov_length (entry->rpchdr, entry->rpchdr_count) |
1950 | - + iov_length (entry->proghdr, entry->proghdr_count) |
1951 | - + iov_length (entry->prog_payload, entry->prog_payload_count) |
1952 | - + sizeof (gf_rdma_header_t); /* |
1953 | - * remember, no chunklists in the |
1954 | - * reply |
1955 | - */ |
1956 | - |
1957 | - if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) { |
1958 | - ret = __gf_rdma_send_error (peer, entry, post, reply_info, |
1959 | - ERR_CHUNK); |
1960 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1961 | - "msg size (%d) is greater than maximum size " |
1962 | - "of msg that can be sent inlined (%d)", |
1963 | - send_size, GLUSTERFS_RDMA_INLINE_THRESHOLD); |
1964 | - goto out; |
1965 | - } |
1966 | - |
1967 | - header = (gf_rdma_header_t *)post->buf; |
1968 | - |
1969 | - __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info, |
1970 | - peer->send_count); |
1971 | - |
1972 | - buf = (char *)&header->rm_body.rm_chunks[3]; |
1973 | - |
1974 | - if (entry->rpchdr_count != 0) { |
1975 | - iov_unload (buf, entry->rpchdr, entry->rpchdr_count); |
1976 | - buf += iov_length (entry->rpchdr, entry->rpchdr_count); |
1977 | - } |
1978 | - |
1979 | - if (entry->proghdr_count != 0) { |
1980 | - iov_unload (buf, entry->proghdr, entry->proghdr_count); |
1981 | - buf += iov_length (entry->proghdr, entry->proghdr_count); |
1982 | - } |
1983 | - |
1984 | - if (entry->prog_payload_count != 0) { |
1985 | - iov_unload (buf, entry->prog_payload, |
1986 | - entry->prog_payload_count); |
1987 | - buf += iov_length (entry->prog_payload, |
1988 | - entry->prog_payload_count); |
1989 | - } |
1990 | - |
1991 | - gf_rdma_post_ref (post); |
1992 | - |
1993 | - ret = gf_rdma_post_send (peer->qp, post, (buf - post->buf)); |
1994 | - if (!ret) { |
1995 | - ret = send_size; |
1996 | - } else { |
1997 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
1998 | - "posting send (to %s) failed with ret = %d (%s)", |
1999 | - peer->trans->peerinfo.identifier, ret, |
2000 | - (ret > 0) ? strerror (ret) : ""); |
2001 | - gf_rdma_post_unref (post); |
2002 | - __gf_rdma_disconnect (peer->trans); |
2003 | - ret = -1; |
2004 | - } |
2005 | - |
2006 | -out: |
2007 | - return ret; |
2008 | -} |
2009 | - |
2010 | - |
2011 | -int32_t |
2012 | -__gf_rdma_reply_encode_write_chunks (gf_rdma_peer_t *peer, |
2013 | - uint32_t payload_size, |
2014 | - gf_rdma_post_t *post, |
2015 | - gf_rdma_reply_info_t *reply_info, |
2016 | - uint32_t **ptr) |
2017 | -{ |
2018 | - uint32_t chunk_size = 0; |
2019 | - int32_t ret = -1; |
2020 | - gf_rdma_write_array_t *target_array = NULL; |
2021 | - int i = 0; |
2022 | - |
2023 | - target_array = (gf_rdma_write_array_t *)*ptr; |
2024 | - |
2025 | - for (i = 0; i < reply_info->wc_array->wc_nchunks; i++) { |
2026 | - chunk_size += |
2027 | - reply_info->wc_array->wc_array[i].wc_target.rs_length; |
2028 | - } |
2029 | - |
2030 | - if (chunk_size < payload_size) { |
2031 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG, |
2032 | - "length of payload (%d) is exceeding the total " |
2033 | - "write chunk length (%d)", payload_size, chunk_size); |
2034 | - goto out; |
2035 | - } |
2036 | - |
2037 | - target_array->wc_discrim = hton32 (1); |
2038 | - for (i = 0; (i < reply_info->wc_array->wc_nchunks) |
2039 | - && (payload_size != 0); |
2040 | - i++) { |
2041 | - target_array->wc_array[i].wc_target.rs_offset |
2042 | - = hton64 (reply_info->wc_array->wc_array[i].wc_target.rs_offset); |
2043 | - |
2044 | - target_array->wc_array[i].wc_target.rs_length |
2045 | - = hton32 (min (payload_size, |
2046 | - reply_info->wc_array->wc_array[i].wc_target.rs_length)); |
2047 | - } |
2048 | - |
2049 | - target_array->wc_nchunks = hton32 (i); |
2050 | - target_array->wc_array[i].wc_target.rs_handle = 0; /* terminate |
2051 | - chunklist */ |
2052 | - |
2053 | - ret = 0; |
2054 | - |
2055 | - *ptr = &target_array->wc_array[i].wc_target.rs_length; |
2056 | -out: |
2057 | - return ret; |
2058 | -} |
2059 | - |
2060 | - |
2061 | -inline int32_t |
2062 | -__gf_rdma_register_local_mr_for_rdma (gf_rdma_peer_t *peer, |
2063 | - struct iovec *vector, int count, |
2064 | - gf_rdma_post_context_t *ctx) |
2065 | -{ |
2066 | - int i = 0; |
2067 | - int32_t ret = -1; |
2068 | - gf_rdma_private_t *priv = NULL; |
2069 | - gf_rdma_device_t *device = NULL; |
2070 | - |
2071 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ctx, out); |
2072 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out); |
2073 | - |
2074 | - priv = peer->trans->private; |
2075 | - device = priv->device; |
2076 | - |
2077 | - for (i = 0; i < count; i++) { |
2078 | - /* what if the memory is registered more than once? |
2079 | - * Assume that a single write buffer is passed to afr, which |
2080 | - * then passes it to its children. If more than one children |
2081 | - * happen to use rdma, then the buffer is registered more than |
2082 | - * once. |
2083 | - * Ib-verbs specification says that multiple registrations of |
2084 | - * same memory location is allowed. Refer to 10.6.3.8 of |
2085 | - * Infiniband Architecture Specification Volume 1 |
2086 | - * (Release 1.2.1) |
2087 | - */ |
2088 | - ctx->mr[ctx->mr_count] = ibv_reg_mr (device->pd, |
2089 | - vector[i].iov_base, |
2090 | - vector[i].iov_len, |
2091 | - IBV_ACCESS_LOCAL_WRITE); |
2092 | - if (ctx->mr[ctx->mr_count] == NULL) { |
2093 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2094 | - "registering memory for IBV_ACCESS_LOCAL_WRITE " |
2095 | - "failed (%s)", strerror (errno)); |
2096 | - goto out; |
2097 | - } |
2098 | - |
2099 | - ctx->mr_count++; |
2100 | - } |
2101 | - |
2102 | - ret = 0; |
2103 | -out: |
2104 | - return ret; |
2105 | -} |
2106 | - |
2107 | -/* 1. assumes xfer_len of data is pointed by vector(s) starting from vec[*idx] |
2108 | - * 2. modifies vec |
2109 | - */ |
2110 | -int32_t |
2111 | -__gf_rdma_write (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *vec, |
2112 | - uint32_t xfer_len, int *idx, gf_rdma_write_chunk_t *writech) |
2113 | -{ |
2114 | - int size = 0, num_sge = 0, i = 0; |
2115 | - int32_t ret = -1; |
2116 | - struct ibv_sge *sg_list = NULL; |
2117 | - struct ibv_send_wr wr = { |
2118 | - .opcode = IBV_WR_RDMA_WRITE, |
2119 | - .send_flags = IBV_SEND_SIGNALED, |
2120 | - }, *bad_wr; |
2121 | - |
2122 | - if ((peer == NULL) || (writech == NULL) || (idx == NULL) |
2123 | - || (post == NULL) || (vec == NULL) || (xfer_len == 0)) { |
2124 | - goto out; |
2125 | - } |
2126 | - |
2127 | - for (i = *idx; size < xfer_len; i++) { |
2128 | - size += vec[i].iov_len; |
2129 | - } |
2130 | - |
2131 | - num_sge = i - *idx; |
2132 | - |
2133 | - sg_list = GF_CALLOC (num_sge, sizeof (struct ibv_sge), |
2134 | - gf_common_mt_sge); |
2135 | - if (sg_list == NULL) { |
2136 | - ret = -1; |
2137 | - goto out; |
2138 | - } |
2139 | - |
2140 | - for ((i = *idx), (num_sge = 0); (xfer_len != 0); i++, num_sge++) { |
2141 | - size = min (xfer_len, vec[i].iov_len); |
2142 | - |
2143 | - sg_list [num_sge].addr = (unsigned long)vec[i].iov_base; |
2144 | - sg_list [num_sge].length = size; |
2145 | - sg_list [num_sge].lkey = post->ctx.mr[i]->lkey; |
2146 | - |
2147 | - xfer_len -= size; |
2148 | - } |
2149 | - |
2150 | - *idx = i; |
2151 | - |
2152 | - if (size < vec[i - 1].iov_len) { |
2153 | - vec[i - 1].iov_base += size; |
2154 | - vec[i - 1].iov_len -= size; |
2155 | - *idx = i - 1; |
2156 | - } |
2157 | - |
2158 | - wr.sg_list = sg_list; |
2159 | - wr.num_sge = num_sge; |
2160 | - wr.wr_id = (unsigned long) gf_rdma_post_ref (post); |
2161 | - wr.wr.rdma.rkey = writech->wc_target.rs_handle; |
2162 | - wr.wr.rdma.remote_addr = writech->wc_target.rs_offset; |
2163 | - |
2164 | - ret = ibv_post_send(peer->qp, &wr, &bad_wr); |
2165 | - if (ret) { |
2166 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2167 | - "rdma write to " |
2168 | - "client (%s) failed with ret = %d (%s)", |
2169 | - peer->trans->peerinfo.identifier, ret, |
2170 | - (ret > 0) ? strerror (ret) : ""); |
2171 | - ret = -1; |
2172 | - } |
2173 | - |
2174 | - GF_FREE (sg_list); |
2175 | -out: |
2176 | - return ret; |
2177 | -} |
2178 | - |
2179 | - |
2180 | -int32_t |
2181 | -__gf_rdma_do_gf_rdma_write (gf_rdma_peer_t *peer, gf_rdma_post_t *post, |
2182 | - struct iovec *vector, int count, |
2183 | - struct iobref *iobref, |
2184 | - gf_rdma_reply_info_t *reply_info) |
2185 | -{ |
2186 | - int i = 0, payload_idx = 0; |
2187 | - uint32_t payload_size = 0, xfer_len = 0; |
2188 | - int32_t ret = -1; |
2189 | - |
2190 | - if (count != 0) { |
2191 | - payload_size = iov_length (vector, count); |
2192 | - } |
2193 | - |
2194 | - if (payload_size == 0) { |
2195 | - ret = 0; |
2196 | - goto out; |
2197 | - } |
2198 | - |
2199 | - ret = __gf_rdma_register_local_mr_for_rdma (peer, vector, count, |
2200 | - &post->ctx); |
2201 | - if (ret == -1) { |
2202 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2203 | - "registering memory region for rdma failed"); |
2204 | - goto out; |
2205 | - } |
2206 | - |
2207 | - post->ctx.iobref = iobref_ref (iobref); |
2208 | - |
2209 | - for (i = 0; (i < reply_info->wc_array->wc_nchunks) |
2210 | - && (payload_size != 0); |
2211 | - i++) { |
2212 | - xfer_len = min (payload_size, |
2213 | - reply_info->wc_array->wc_array[i].wc_target.rs_length); |
2214 | - |
2215 | - ret = __gf_rdma_write (peer, post, vector, xfer_len, |
2216 | - &payload_idx, |
2217 | - &reply_info->wc_array->wc_array[i]); |
2218 | - if (ret == -1) { |
2219 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2220 | - "rdma write to client (%s) failed", |
2221 | - peer->trans->peerinfo.identifier); |
2222 | - goto out; |
2223 | - } |
2224 | - |
2225 | - payload_size -= xfer_len; |
2226 | - } |
2227 | - |
2228 | - ret = 0; |
2229 | -out: |
2230 | - |
2231 | - return ret; |
2232 | -} |
2233 | - |
2234 | - |
2235 | -int32_t |
2236 | -__gf_rdma_send_reply_type_nomsg (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry, |
2237 | - gf_rdma_post_t *post, |
2238 | - gf_rdma_reply_info_t *reply_info) |
2239 | -{ |
2240 | - gf_rdma_header_t *header = NULL; |
2241 | - char *buf = NULL; |
2242 | - uint32_t payload_size = 0; |
2243 | - int count = 0, i = 0; |
2244 | - int32_t ret = 0; |
2245 | - struct iovec vector[MAX_IOVEC]; |
2246 | - |
2247 | - header = (gf_rdma_header_t *)post->buf; |
2248 | - |
2249 | - __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info, |
2250 | - peer->send_count); |
2251 | - |
2252 | - header->rm_type = hton32 (GF_RDMA_NOMSG); |
2253 | - |
2254 | - payload_size = iov_length (entry->rpchdr, entry->rpchdr_count) + |
2255 | - iov_length (entry->proghdr, entry->proghdr_count); |
2256 | - |
2257 | - /* encode reply chunklist */ |
2258 | - buf = (char *)&header->rm_body.rm_chunks[2]; |
2259 | - ret = __gf_rdma_reply_encode_write_chunks (peer, payload_size, post, |
2260 | - reply_info, |
2261 | - (uint32_t **)&buf); |
2262 | - if (ret == -1) { |
2263 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2264 | - "encoding write chunks failed"); |
2265 | - ret = __gf_rdma_send_error (peer, entry, post, reply_info, |
2266 | - ERR_CHUNK); |
2267 | - goto out; |
2268 | - } |
2269 | - |
2270 | - gf_rdma_post_ref (post); |
2271 | - |
2272 | - for (i = 0; i < entry->rpchdr_count; i++) { |
2273 | - vector[count++] = entry->rpchdr[i]; |
2274 | - } |
2275 | - |
2276 | - for (i = 0; i < entry->proghdr_count; i++) { |
2277 | - vector[count++] = entry->proghdr[i]; |
2278 | - } |
2279 | - |
2280 | - ret = __gf_rdma_do_gf_rdma_write (peer, post, vector, count, |
2281 | - entry->iobref, reply_info); |
2282 | - if (ret == -1) { |
2283 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2284 | - "rdma write to peer (%s) failed", |
2285 | - peer->trans->peerinfo.identifier); |
2286 | - gf_rdma_post_unref (post); |
2287 | - goto out; |
2288 | - } |
2289 | - |
2290 | - ret = gf_rdma_post_send (peer->qp, post, (buf - post->buf)); |
2291 | - if (ret) { |
2292 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2293 | - "posting a send request to client (%s) failed with " |
2294 | - "ret = %d (%s)", peer->trans->peerinfo.identifier, ret, |
2295 | - (ret > 0) ? strerror (ret) : ""); |
2296 | - ret = -1; |
2297 | - gf_rdma_post_unref (post); |
2298 | - } else { |
2299 | - ret = payload_size; |
2300 | - } |
2301 | - |
2302 | -out: |
2303 | - return ret; |
2304 | -} |
2305 | - |
2306 | - |
2307 | -int32_t |
2308 | -__gf_rdma_send_reply_type_msg (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry, |
2309 | - gf_rdma_post_t *post, |
2310 | - gf_rdma_reply_info_t *reply_info) |
2311 | -{ |
2312 | - gf_rdma_header_t *header = NULL; |
2313 | - int32_t send_size = 0, ret = 0; |
2314 | - char *ptr = NULL; |
2315 | - uint32_t payload_size = 0; |
2316 | - |
2317 | - send_size = iov_length (entry->rpchdr, entry->rpchdr_count) |
2318 | - + iov_length (entry->proghdr, entry->proghdr_count) |
2319 | - + GLUSTERFS_RDMA_MAX_HEADER_SIZE; |
2320 | - |
2321 | - if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) { |
2322 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2323 | - "client has provided only write chunks, but the " |
2324 | - "combined size of rpc and program header (%d) is " |
2325 | - "exceeding the size of msg that can be sent using " |
2326 | - "RDMA send (%d)", send_size, |
2327 | - GLUSTERFS_RDMA_INLINE_THRESHOLD); |
2328 | - |
2329 | - ret = __gf_rdma_send_error (peer, entry, post, reply_info, |
2330 | - ERR_CHUNK); |
2331 | - goto out; |
2332 | - } |
2333 | - |
2334 | - header = (gf_rdma_header_t *)post->buf; |
2335 | - |
2336 | - __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info, |
2337 | - peer->send_count); |
2338 | - |
2339 | - payload_size = iov_length (entry->prog_payload, |
2340 | - entry->prog_payload_count); |
2341 | - ptr = (char *)&header->rm_body.rm_chunks[1]; |
2342 | - |
2343 | - ret = __gf_rdma_reply_encode_write_chunks (peer, payload_size, post, |
2344 | - reply_info, |
2345 | - (uint32_t **)&ptr); |
2346 | - if (ret == -1) { |
2347 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2348 | - "encoding write chunks failed"); |
2349 | - ret = __gf_rdma_send_error (peer, entry, post, reply_info, |
2350 | - ERR_CHUNK); |
2351 | - goto out; |
2352 | - } |
2353 | - |
2354 | - *(uint32_t *)ptr = 0; /* terminate reply chunklist */ |
2355 | - ptr += sizeof (uint32_t); |
2356 | - |
2357 | - gf_rdma_post_ref (post); |
2358 | - |
2359 | - ret = __gf_rdma_do_gf_rdma_write (peer, post, entry->prog_payload, |
2360 | - entry->prog_payload_count, |
2361 | - entry->iobref, reply_info); |
2362 | - if (ret == -1) { |
2363 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, "rdma write to peer " |
2364 | - "(%s) failed", peer->trans->peerinfo.identifier); |
2365 | - gf_rdma_post_unref (post); |
2366 | - goto out; |
2367 | - } |
2368 | - |
2369 | - iov_unload (ptr, entry->rpchdr, entry->rpchdr_count); |
2370 | - ptr += iov_length (entry->rpchdr, entry->rpchdr_count); |
2371 | - |
2372 | - iov_unload (ptr, entry->proghdr, entry->proghdr_count); |
2373 | - ptr += iov_length (entry->proghdr, entry->proghdr_count); |
2374 | - |
2375 | - ret = gf_rdma_post_send (peer->qp, post, (ptr - post->buf)); |
2376 | - if (ret) { |
2377 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2378 | - "rdma send to client (%s) failed with ret = %d (%s)", |
2379 | - peer->trans->peerinfo.identifier, ret, |
2380 | - (ret > 0) ? strerror (ret) : ""); |
2381 | - gf_rdma_post_unref (post); |
2382 | - ret = -1; |
2383 | - } else { |
2384 | - ret = send_size + payload_size; |
2385 | - } |
2386 | - |
2387 | -out: |
2388 | - return ret; |
2389 | -} |
2390 | - |
2391 | - |
2392 | -void |
2393 | -gf_rdma_reply_info_destroy (gf_rdma_reply_info_t *reply_info) |
2394 | -{ |
2395 | - if (reply_info == NULL) { |
2396 | - goto out; |
2397 | - } |
2398 | - |
2399 | - if (reply_info->wc_array != NULL) { |
2400 | - GF_FREE (reply_info->wc_array); |
2401 | - reply_info->wc_array = NULL; |
2402 | - } |
2403 | - |
2404 | - mem_put (reply_info); |
2405 | -out: |
2406 | - return; |
2407 | -} |
2408 | - |
2409 | - |
2410 | -gf_rdma_reply_info_t * |
2411 | -gf_rdma_reply_info_alloc (gf_rdma_peer_t *peer) |
2412 | -{ |
2413 | - gf_rdma_reply_info_t *reply_info = NULL; |
2414 | - gf_rdma_private_t *priv = NULL; |
2415 | - |
2416 | - priv = peer->trans->private; |
2417 | - |
2418 | - reply_info = mem_get (priv->device->reply_info_pool); |
2419 | - if (reply_info == NULL) { |
2420 | - goto out; |
2421 | - } |
2422 | - |
2423 | - memset (reply_info, 0, sizeof (*reply_info)); |
2424 | - reply_info->pool = priv->device->reply_info_pool; |
2425 | - |
2426 | -out: |
2427 | - return reply_info; |
2428 | -} |
2429 | - |
2430 | - |
2431 | -int32_t |
2432 | -__gf_rdma_ioq_churn_reply (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry, |
2433 | - gf_rdma_post_t *post) |
2434 | -{ |
2435 | - gf_rdma_reply_info_t *reply_info = NULL; |
2436 | - int32_t ret = -1; |
2437 | - gf_rdma_chunktype_t type = gf_rdma_noch; |
2438 | - |
2439 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out); |
2440 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out); |
2441 | - GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, post, out); |
2442 | - |
2443 | - reply_info = entry->msg.reply_info; |
2444 | - if (reply_info != NULL) { |
2445 | - type = reply_info->type; |
2446 | - } |
2447 | - |
2448 | - switch (type) { |
2449 | - case gf_rdma_noch: |
2450 | - ret = __gf_rdma_send_reply_inline (peer, entry, post, |
2451 | - reply_info); |
2452 | - if (ret < 0) { |
2453 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2454 | - "failed to send reply to peer (%s) as an " |
2455 | - "inlined rdma msg", |
2456 | - peer->trans->peerinfo.identifier); |
2457 | - } |
2458 | - break; |
2459 | - |
2460 | - case gf_rdma_replych: |
2461 | - ret = __gf_rdma_send_reply_type_nomsg (peer, entry, post, |
2462 | - reply_info); |
2463 | - if (ret < 0) { |
2464 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2465 | - "failed to send reply to peer (%s) as " |
2466 | - "RDMA_NOMSG", peer->trans->peerinfo.identifier); |
2467 | - } |
2468 | - break; |
2469 | - |
2470 | - case gf_rdma_writech: |
2471 | - ret = __gf_rdma_send_reply_type_msg (peer, entry, post, |
2472 | - reply_info); |
2473 | - if (ret < 0) { |
2474 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2475 | - "failed to send reply with write chunks " |
2476 | - "to peer (%s)", |
2477 | - peer->trans->peerinfo.identifier); |
2478 | - } |
2479 | - break; |
2480 | - |
2481 | - default: |
2482 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2483 | - "invalid chunktype (%d) specified for sending reply " |
2484 | - " (peer:%s)", type, peer->trans->peerinfo.identifier); |
2485 | - break; |
2486 | - } |
2487 | - |
2488 | - if (reply_info != NULL) { |
2489 | - gf_rdma_reply_info_destroy (reply_info); |
2490 | - } |
2491 | -out: |
2492 | - return ret; |
2493 | -} |
2494 | - |
2495 | - |
2496 | -int32_t |
2497 | -__gf_rdma_ioq_churn_entry (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry) |
2498 | -{ |
2499 | - int32_t ret = 0, quota = 0; |
2500 | - gf_rdma_private_t *priv = NULL; |
2501 | - gf_rdma_device_t *device = NULL; |
2502 | - gf_rdma_options_t *options = NULL; |
2503 | - gf_rdma_post_t *post = NULL; |
2504 | - |
2505 | - priv = peer->trans->private; |
2506 | - options = &priv->options; |
2507 | - device = priv->device; |
2508 | - |
2509 | - quota = __gf_rdma_quota_get (peer); |
2510 | - if (quota > 0) { |
2511 | - post = gf_rdma_get_post (&device->sendq); |
2512 | - if (post == NULL) { |
2513 | - post = gf_rdma_new_post (peer->trans, device, |
2514 | - (options->send_size + 2048), |
2515 | - GF_RDMA_SEND_POST); |
2516 | - } |
2517 | - |
2518 | - if (post == NULL) { |
2519 | - ret = -1; |
2520 | - gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2521 | - "not able to get a post to send msg"); |
2522 | - goto out; |
2523 | - } |
2524 | - |
2525 | - if (entry->is_request) { |
2526 | - ret = __gf_rdma_ioq_churn_request (peer, entry, post); |
2527 | - if (ret < 0) { |
2528 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2529 | - "failed to process request ioq entry " |
2530 | - "to peer(%s)", |
2531 | - peer->trans->peerinfo.identifier); |
2532 | - } |
2533 | - } else { |
2534 | - ret = __gf_rdma_ioq_churn_reply (peer, entry, post); |
2535 | - if (ret < 0) { |
2536 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2537 | - "failed to process reply ioq entry " |
2538 | - "to peer (%s)", |
2539 | - peer->trans->peerinfo.identifier); |
2540 | - } |
2541 | - } |
2542 | - |
2543 | - if (ret != 0) { |
2544 | - __gf_rdma_ioq_entry_free (entry); |
2545 | - } |
2546 | - } else { |
2547 | - ret = 0; |
2548 | - } |
2549 | - |
2550 | -out: |
2551 | - return ret; |
2552 | -} |
2553 | - |
2554 | - |
2555 | -static int32_t |
2556 | -__gf_rdma_ioq_churn (gf_rdma_peer_t *peer) |
2557 | -{ |
2558 | - gf_rdma_ioq_t *entry = NULL; |
2559 | - int32_t ret = 0; |
2560 | - |
2561 | - while (!list_empty (&peer->ioq)) |
2562 | - { |
2563 | - /* pick next entry */ |
2564 | - entry = peer->ioq_next; |
2565 | - |
2566 | - ret = __gf_rdma_ioq_churn_entry (peer, entry); |
2567 | - |
2568 | - if (ret <= 0) |
2569 | - break; |
2570 | - } |
2571 | - |
2572 | - /* |
2573 | - list_for_each_entry_safe (entry, dummy, &peer->ioq, list) { |
2574 | - ret = __gf_rdma_ioq_churn_entry (peer, entry); |
2575 | - if (ret <= 0) { |
2576 | - break; |
2577 | - } |
2578 | - } |
2579 | - */ |
2580 | - |
2581 | - return ret; |
2582 | -} |
2583 | - |
2584 | - |
2585 | -static int32_t |
2586 | -gf_rdma_writev (rpc_transport_t *this, gf_rdma_ioq_t *entry) |
2587 | -{ |
2588 | - int32_t ret = 0, need_append = 1; |
2589 | - gf_rdma_private_t *priv = NULL; |
2590 | - gf_rdma_peer_t *peer = NULL; |
2591 | - |
2592 | - priv = this->private; |
2593 | - pthread_mutex_lock (&priv->write_mutex); |
2594 | - { |
2595 | - if (!priv->connected) { |
2596 | - gf_log (this->name, GF_LOG_WARNING, |
2597 | - "rdma is not connected to peer (%s)", |
2598 | - this->peerinfo.identifier); |
2599 | - ret = -1; |
2600 | - goto unlock; |
2601 | - } |
2602 | - |
2603 | - peer = &priv->peer; |
2604 | - if (list_empty (&peer->ioq)) { |
2605 | - ret = __gf_rdma_ioq_churn_entry (peer, entry); |
2606 | - if (ret != 0) { |
2607 | - need_append = 0; |
2608 | - |
2609 | - if (ret < 0) { |
2610 | - gf_log (this->name, GF_LOG_WARNING, |
2611 | - "processing ioq entry destined " |
2612 | - "to (%s) failed", |
2613 | - this->peerinfo.identifier); |
2614 | - } |
2615 | - } |
2616 | - } |
2617 | - |
2618 | - if (need_append) { |
2619 | - list_add_tail (&entry->list, &peer->ioq); |
2620 | - } |
2621 | - } |
2622 | -unlock: |
2623 | - pthread_mutex_unlock (&priv->write_mutex); |
2624 | - return ret; |
2625 | -} |
2626 | - |
2627 | - |
2628 | -gf_rdma_ioq_t * |
2629 | -gf_rdma_ioq_new (rpc_transport_t *this, rpc_transport_data_t *data) |
2630 | -{ |
2631 | - gf_rdma_ioq_t *entry = NULL; |
2632 | - int count = 0, i = 0; |
2633 | - rpc_transport_msg_t *msg = NULL; |
2634 | - gf_rdma_private_t *priv = NULL; |
2635 | - |
2636 | - if ((data == NULL) || (this == NULL)) { |
2637 | - goto out; |
2638 | - } |
2639 | - |
2640 | - priv = this->private; |
2641 | - |
2642 | - entry = mem_get (priv->device->ioq_pool); |
2643 | - if (entry == NULL) { |
2644 | - goto out; |
2645 | - } |
2646 | - memset (entry, 0, sizeof (*entry)); |
2647 | - entry->pool = priv->device->ioq_pool; |
2648 | - |
2649 | - if (data->is_request) { |
2650 | - msg = &data->data.req.msg; |
2651 | - if (data->data.req.rsp.rsphdr_count != 0) { |
2652 | - for (i = 0; i < data->data.req.rsp.rsphdr_count; i++) { |
2653 | - entry->msg.request.rsphdr_vec[i] |
2654 | - = data->data.req.rsp.rsphdr[i]; |
2655 | - } |
2656 | - |
2657 | - entry->msg.request.rsphdr_count = |
2658 | - data->data.req.rsp.rsphdr_count; |
2659 | - } |
2660 | - |
2661 | - if (data->data.req.rsp.rsp_payload_count != 0) { |
2662 | - for (i = 0; i < data->data.req.rsp.rsp_payload_count; |
2663 | - i++) { |
2664 | - entry->msg.request.rsp_payload[i] |
2665 | - = data->data.req.rsp.rsp_payload[i]; |
2666 | - } |
2667 | - |
2668 | - entry->msg.request.rsp_payload_count = |
2669 | - data->data.req.rsp.rsp_payload_count; |
2670 | - } |
2671 | - |
2672 | - entry->msg.request.rpc_req = data->data.req.rpc_req; |
2673 | - |
2674 | - if (data->data.req.rsp.rsp_iobref != NULL) { |
2675 | - entry->msg.request.rsp_iobref |
2676 | - = iobref_ref (data->data.req.rsp.rsp_iobref); |
2677 | - } |
2678 | - } else { |
2679 | - msg = &data->data.reply.msg; |
2680 | - entry->msg.reply_info = data->data.reply.private; |
2681 | - } |
2682 | - |
2683 | - entry->is_request = data->is_request; |
2684 | - |
2685 | - count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount; |
2686 | - |
2687 | - GF_ASSERT (count <= MAX_IOVEC); |
2688 | - |
2689 | - if (msg->rpchdr != NULL) { |
2690 | - memcpy (&entry->rpchdr[0], msg->rpchdr, |
2691 | - sizeof (struct iovec) * msg->rpchdrcount); |
2692 | - entry->rpchdr_count = msg->rpchdrcount; |
2693 | - } |
2694 | - |
2695 | - if (msg->proghdr != NULL) { |
2696 | - memcpy (&entry->proghdr[0], msg->proghdr, |
2697 | - sizeof (struct iovec) * msg->proghdrcount); |
2698 | - entry->proghdr_count = msg->proghdrcount; |
2699 | - } |
2700 | - |
2701 | - if (msg->progpayload != NULL) { |
2702 | - memcpy (&entry->prog_payload[0], msg->progpayload, |
2703 | - sizeof (struct iovec) * msg->progpayloadcount); |
2704 | - entry->prog_payload_count = msg->progpayloadcount; |
2705 | - } |
2706 | - |
2707 | - if (msg->iobref != NULL) { |
2708 | - entry->iobref = iobref_ref (msg->iobref); |
2709 | - } |
2710 | - |
2711 | - INIT_LIST_HEAD (&entry->list); |
2712 | - |
2713 | -out: |
2714 | - return entry; |
2715 | -} |
2716 | - |
2717 | - |
2718 | -int32_t |
2719 | -gf_rdma_submit_request (rpc_transport_t *this, rpc_transport_req_t *req) |
2720 | -{ |
2721 | - int32_t ret = 0; |
2722 | - gf_rdma_ioq_t *entry = NULL; |
2723 | - rpc_transport_data_t data = {0, }; |
2724 | - |
2725 | - if (req == NULL) { |
2726 | - goto out; |
2727 | - } |
2728 | - |
2729 | - data.is_request = 1; |
2730 | - data.data.req = *req; |
2731 | - |
2732 | - entry = gf_rdma_ioq_new (this, &data); |
2733 | - if (entry == NULL) { |
2734 | - gf_log (this->name, GF_LOG_WARNING, |
2735 | - "getting a new ioq entry failed (peer:%s)", |
2736 | - this->peerinfo.identifier); |
2737 | - goto out; |
2738 | - } |
2739 | - |
2740 | - ret = gf_rdma_writev (this, entry); |
2741 | - |
2742 | - if (ret > 0) { |
2743 | - ret = 0; |
2744 | - } else if (ret < 0) { |
2745 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2746 | - "sending request to peer (%s) failed", |
2747 | - this->peerinfo.identifier); |
2748 | - rpc_transport_disconnect (this); |
2749 | - } |
2750 | - |
2751 | -out: |
2752 | - return ret; |
2753 | -} |
2754 | - |
2755 | -int32_t |
2756 | -gf_rdma_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply) |
2757 | -{ |
2758 | - int32_t ret = 0; |
2759 | - gf_rdma_ioq_t *entry = NULL; |
2760 | - rpc_transport_data_t data = {0, }; |
2761 | - |
2762 | - if (reply == NULL) { |
2763 | - goto out; |
2764 | - } |
2765 | - |
2766 | - data.data.reply = *reply; |
2767 | - |
2768 | - entry = gf_rdma_ioq_new (this, &data); |
2769 | - if (entry == NULL) { |
2770 | - gf_log (this->name, GF_LOG_WARNING, |
2771 | - "getting a new ioq entry failed (peer:%s)", |
2772 | - this->peerinfo.identifier); |
2773 | - goto out; |
2774 | - } |
2775 | - |
2776 | - ret = gf_rdma_writev (this, entry); |
2777 | - if (ret > 0) { |
2778 | - ret = 0; |
2779 | - } else if (ret < 0) { |
2780 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
2781 | - "sending request to peer (%s) failed", |
2782 | - this->peerinfo.identifier); |
2783 | - rpc_transport_disconnect (this); |
2784 | - } |
2785 | - |
2786 | -out: |
2787 | - return ret; |
2788 | -} |
2789 | - |
2790 | - |
2791 | -static int |
2792 | -gf_rdma_register_peer (gf_rdma_device_t *device, int32_t qp_num, |
2793 | - gf_rdma_peer_t *peer) |
2794 | -{ |
2795 | - struct _qpent *ent = NULL; |
2796 | - gf_rdma_qpreg_t *qpreg = NULL; |
2797 | - int32_t hash = 0; |
2798 | - int ret = -1; |
2799 | - |
2800 | - qpreg = &device->qpreg; |
2801 | - hash = qp_num % 42; |
2802 | - |
2803 | - pthread_mutex_lock (&qpreg->lock); |
2804 | - { |
2805 | - ent = qpreg->ents[hash].next; |
2806 | - while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) { |
2807 | - ent = ent->next; |
2808 | - } |
2809 | - |
2810 | - if (ent->qp_num == qp_num) { |
2811 | - ret = 0; |
2812 | - goto unlock; |
2813 | - } |
2814 | - |
2815 | - ent = (struct _qpent *) GF_CALLOC (1, sizeof (*ent), |
2816 | - gf_common_mt_qpent); |
2817 | - if (ent == NULL) { |
2818 | - goto unlock; |
2819 | - } |
2820 | - |
2821 | - /* TODO: ref reg->peer */ |
2822 | - ent->peer = peer; |
2823 | - ent->next = &qpreg->ents[hash]; |
2824 | - ent->prev = ent->next->prev; |
2825 | - ent->next->prev = ent; |
2826 | - ent->prev->next = ent; |
2827 | - ent->qp_num = qp_num; |
2828 | - qpreg->count++; |
2829 | - ret = 0; |
2830 | - } |
2831 | -unlock: |
2832 | - pthread_mutex_unlock (&qpreg->lock); |
2833 | - |
2834 | - return ret; |
2835 | -} |
2836 | - |
2837 | - |
2838 | -static void |
2839 | -gf_rdma_unregister_peer (gf_rdma_device_t *device, int32_t qp_num) |
2840 | -{ |
2841 | - struct _qpent *ent = NULL; |
2842 | - gf_rdma_qpreg_t *qpreg = NULL; |
2843 | - int32_t hash = 0; |
2844 | - |
2845 | - qpreg = &device->qpreg; |
2846 | - hash = qp_num % 42; |
2847 | - |
2848 | - pthread_mutex_lock (&qpreg->lock); |
2849 | - { |
2850 | - ent = qpreg->ents[hash].next; |
2851 | - while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) |
2852 | - ent = ent->next; |
2853 | - if (ent->qp_num != qp_num) { |
2854 | - pthread_mutex_unlock (&qpreg->lock); |
2855 | - return; |
2856 | - } |
2857 | - ent->prev->next = ent->next; |
2858 | - ent->next->prev = ent->prev; |
2859 | - /* TODO: unref reg->peer */ |
2860 | - GF_FREE (ent); |
2861 | - qpreg->count--; |
2862 | - } |
2863 | - pthread_mutex_unlock (&qpreg->lock); |
2864 | -} |
2865 | - |
2866 | - |
2867 | -static gf_rdma_peer_t * |
2868 | -__gf_rdma_lookup_peer (gf_rdma_device_t *device, int32_t qp_num) |
2869 | -{ |
2870 | - struct _qpent *ent = NULL; |
2871 | - gf_rdma_peer_t *peer = NULL; |
2872 | - gf_rdma_qpreg_t *qpreg = NULL; |
2873 | - int32_t hash = 0; |
2874 | - |
2875 | - qpreg = &device->qpreg; |
2876 | - hash = qp_num % 42; |
2877 | - ent = qpreg->ents[hash].next; |
2878 | - while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) |
2879 | - ent = ent->next; |
2880 | - |
2881 | - if (ent != &qpreg->ents[hash]) { |
2882 | - peer = ent->peer; |
2883 | - } |
2884 | - |
2885 | - return peer; |
2886 | -} |
2887 | - |
2888 | - |
2889 | -static void |
2890 | -__gf_rdma_destroy_qp (rpc_transport_t *this) |
2891 | -{ |
2892 | - gf_rdma_private_t *priv = NULL; |
2893 | - |
2894 | - priv = this->private; |
2895 | - if (priv->peer.qp) { |
2896 | - gf_rdma_unregister_peer (priv->device, priv->peer.qp->qp_num); |
2897 | - rdma_destroy_qp (priv->peer.cm_id); |
2898 | - } |
2899 | - priv->peer.qp = NULL; |
2900 | - |
2901 | - return; |
2902 | -} |
2903 | - |
2904 | - |
2905 | -static int32_t |
2906 | -gf_rdma_create_qp (rpc_transport_t *this) |
2907 | -{ |
2908 | - gf_rdma_private_t *priv = NULL; |
2909 | - gf_rdma_device_t *device = NULL; |
2910 | - int32_t ret = 0; |
2911 | - gf_rdma_peer_t *peer = NULL; |
2912 | - char *device_name = NULL; |
2913 | - |
2914 | - priv = this->private; |
2915 | - |
2916 | - peer = &priv->peer; |
2917 | - |
2918 | - device_name = (char *)ibv_get_device_name (peer->cm_id->verbs->device); |
2919 | - if (device_name == NULL) { |
2920 | - ret = -1; |
2921 | - gf_log (this->name, GF_LOG_WARNING, "cannot get device_name"); |
2922 | - goto out; |
2923 | - } |
2924 | - |
2925 | - device = gf_rdma_get_device (this, peer->cm_id->verbs, |
2926 | - device_name); |
2927 | - if (device == NULL) { |
2928 | - ret = -1; |
2929 | - gf_log (this->name, GF_LOG_WARNING, "cannot get device for " |
2930 | - "device %s", device_name); |
2931 | - goto out; |
2932 | - } |
2933 | - |
2934 | - if (priv->device == NULL) { |
2935 | - priv->device = device; |
2936 | - } |
2937 | - |
2938 | - struct ibv_qp_init_attr init_attr = { |
2939 | - .send_cq = device->send_cq, |
2940 | - .recv_cq = device->recv_cq, |
2941 | - .srq = device->srq, |
2942 | - .cap = { |
2943 | - .max_send_wr = peer->send_count, |
2944 | - .max_recv_wr = peer->recv_count, |
2945 | - .max_send_sge = 2, |
2946 | - .max_recv_sge = 1 |
2947 | - }, |
2948 | - .qp_type = IBV_QPT_RC |
2949 | - }; |
2950 | - |
2951 | - ret = rdma_create_qp(peer->cm_id, device->pd, &init_attr); |
2952 | - if (ret != 0) { |
2953 | - gf_log (peer->trans->name, GF_LOG_CRITICAL, |
2954 | - "%s: could not create QP (%s)", this->name, |
2955 | - strerror (errno)); |
2956 | - ret = -1; |
2957 | - goto out; |
2958 | - } |
2959 | - |
2960 | - peer->qp = peer->cm_id->qp; |
2961 | - |
2962 | - ret = gf_rdma_register_peer (device, peer->qp->qp_num, peer); |
2963 | - |
2964 | -out: |
2965 | - if (ret == -1) |
2966 | - __gf_rdma_destroy_qp (this); |
2967 | - |
2968 | - return ret; |
2969 | -} |
2970 | - |
2971 | - |
2972 | -static int32_t |
2973 | -__gf_rdma_teardown (rpc_transport_t *this) |
2974 | -{ |
2975 | - gf_rdma_private_t *priv = NULL; |
2976 | - gf_rdma_peer_t *peer = NULL; |
2977 | - |
2978 | - priv = this->private; |
2979 | - peer = &priv->peer; |
2980 | - |
2981 | - if (peer->cm_id->qp != NULL) { |
2982 | - __gf_rdma_destroy_qp (this); |
2983 | - } |
2984 | - |
2985 | - if (!list_empty (&priv->peer.ioq)) { |
2986 | - __gf_rdma_ioq_flush (peer); |
2987 | - } |
2988 | - |
2989 | - if (peer->cm_id != NULL) { |
2990 | - rdma_destroy_id (peer->cm_id); |
2991 | - peer->cm_id = NULL; |
2992 | - } |
2993 | - |
2994 | - /* TODO: decrement cq size */ |
2995 | - return 0; |
2996 | -} |
2997 | - |
2998 | - |
2999 | -static int32_t |
3000 | -gf_rdma_teardown (rpc_transport_t *this) |
3001 | -{ |
3002 | - int32_t ret = 0; |
3003 | - gf_rdma_private_t *priv = NULL; |
3004 | - |
3005 | - if (this == NULL) { |
3006 | - goto out; |
3007 | - } |
3008 | - |
3009 | - priv = this->private; |
3010 | - |
3011 | - pthread_mutex_lock (&priv->write_mutex); |
3012 | - { |
3013 | - ret = __gf_rdma_teardown (this); |
3014 | - } |
3015 | - pthread_mutex_unlock (&priv->write_mutex); |
3016 | - |
3017 | -out: |
3018 | - return ret; |
3019 | -} |
3020 | - |
3021 | - |
3022 | -/* |
3023 | - * allocates new memory to hold write-chunklist. New memory is needed since |
3024 | - * write-chunklist will be used while sending reply and the post holding initial |
3025 | - * write-chunklist sent from client will be put back to srq before a pollin |
3026 | - * event is sent to upper layers. |
3027 | - */ |
3028 | -int32_t |
3029 | -gf_rdma_get_write_chunklist (char **ptr, gf_rdma_write_array_t **write_ary) |
3030 | -{ |
3031 | - gf_rdma_write_array_t *from = NULL, *to = NULL; |
3032 | - int32_t ret = -1, size = 0, i = 0; |
3033 | - |
3034 | - from = (gf_rdma_write_array_t *) *ptr; |
3035 | - if (from->wc_discrim == 0) { |
3036 | - ret = 0; |
3037 | - goto out; |
3038 | - } |
3039 | - |
3040 | - from->wc_nchunks = ntoh32 (from->wc_nchunks); |
3041 | - |
3042 | - size = sizeof (*from) |
3043 | - + (sizeof (gf_rdma_write_chunk_t) * from->wc_nchunks); |
3044 | - |
3045 | - to = GF_CALLOC (1, size, gf_common_mt_char); |
3046 | - if (to == NULL) { |
3047 | - ret = -1; |
3048 | - goto out; |
3049 | - } |
3050 | - |
3051 | - to->wc_discrim = ntoh32 (from->wc_discrim); |
3052 | - to->wc_nchunks = from->wc_nchunks; |
3053 | - |
3054 | - for (i = 0; i < to->wc_nchunks; i++) { |
3055 | - to->wc_array[i].wc_target.rs_handle |
3056 | - = ntoh32 (from->wc_array[i].wc_target.rs_handle); |
3057 | - to->wc_array[i].wc_target.rs_length |
3058 | - = ntoh32 (from->wc_array[i].wc_target.rs_length); |
3059 | - to->wc_array[i].wc_target.rs_offset |
3060 | - = ntoh64 (from->wc_array[i].wc_target.rs_offset); |
3061 | - } |
3062 | - |
3063 | - *write_ary = to; |
3064 | - ret = 0; |
3065 | - *ptr = (char *)&from->wc_array[i].wc_target.rs_handle; |
3066 | -out: |
3067 | - return ret; |
3068 | -} |
3069 | - |
3070 | - |
3071 | -/* |
3072 | - * does not allocate new memory to hold read-chunklist. New memory is not |
3073 | - * needed, since post is not put back to srq till we've completed all the |
3074 | - * rdma-reads and hence readchunk-list can point to memory held by post. |
3075 | - */ |
3076 | -int32_t |
3077 | -gf_rdma_get_read_chunklist (char **ptr, gf_rdma_read_chunk_t **readch) |
3078 | -{ |
3079 | - int32_t ret = -1; |
3080 | - gf_rdma_read_chunk_t *chunk = NULL; |
3081 | - int i = 0; |
3082 | - |
3083 | - chunk = (gf_rdma_read_chunk_t *)*ptr; |
3084 | - if (chunk[0].rc_discrim == 0) { |
3085 | - ret = 0; |
3086 | - goto out; |
3087 | - } |
3088 | - |
3089 | - for (i = 0; chunk[i].rc_discrim != 0; i++) { |
3090 | - chunk[i].rc_discrim = ntoh32 (chunk[i].rc_discrim); |
3091 | - chunk[i].rc_position = ntoh32 (chunk[i].rc_position); |
3092 | - chunk[i].rc_target.rs_handle |
3093 | - = ntoh32 (chunk[i].rc_target.rs_handle); |
3094 | - chunk[i].rc_target.rs_length |
3095 | - = ntoh32 (chunk[i].rc_target.rs_length); |
3096 | - chunk[i].rc_target.rs_offset |
3097 | - = ntoh64 (chunk[i].rc_target.rs_offset); |
3098 | - } |
3099 | - |
3100 | - *readch = &chunk[0]; |
3101 | - ret = 0; |
3102 | - *ptr = (char *)&chunk[i].rc_discrim; |
3103 | -out: |
3104 | - return ret; |
3105 | -} |
3106 | - |
3107 | - |
3108 | -inline int32_t |
3109 | -gf_rdma_decode_error_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post, |
3110 | - size_t bytes_in_post) |
3111 | -{ |
3112 | - gf_rdma_header_t *header = NULL; |
3113 | - struct iobuf *iobuf = NULL; |
3114 | - struct iobref *iobref = NULL; |
3115 | - int32_t ret = -1; |
3116 | - struct rpc_msg rpc_msg = {0, }; |
3117 | - |
3118 | - header = (gf_rdma_header_t *)post->buf; |
3119 | - header->rm_body.rm_error.rm_type |
3120 | - = ntoh32 (header->rm_body.rm_error.rm_type); |
3121 | - if (header->rm_body.rm_error.rm_type == ERR_VERS) { |
3122 | - header->rm_body.rm_error.rm_version.gf_rdma_vers_low = |
3123 | - ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_low); |
3124 | - header->rm_body.rm_error.rm_version.gf_rdma_vers_high = |
3125 | - ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_high); |
3126 | - } |
3127 | - |
3128 | - rpc_msg.rm_xid = header->rm_xid; |
3129 | - rpc_msg.rm_direction = REPLY; |
3130 | - rpc_msg.rm_reply.rp_stat = MSG_DENIED; |
3131 | - |
3132 | - iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, bytes_in_post); |
3133 | - if (iobuf == NULL) { |
3134 | - ret = -1; |
3135 | - goto out; |
3136 | - } |
3137 | - |
3138 | - post->ctx.iobref = iobref = iobref_new (); |
3139 | - if (iobref == NULL) { |
3140 | - ret = -1; |
3141 | - goto out; |
3142 | - } |
3143 | - |
3144 | - iobref_add (iobref, iobuf); |
3145 | - iobuf_unref (iobuf); |
3146 | - |
3147 | - ret = rpc_reply_to_xdr (&rpc_msg, iobuf_ptr (iobuf), |
3148 | - iobuf_pagesize (iobuf), &post->ctx.vector[0]); |
3149 | - if (ret == -1) { |
3150 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3151 | - "Failed to create RPC reply"); |
3152 | - goto out; |
3153 | - } |
3154 | - |
3155 | - post->ctx.count = 1; |
3156 | - |
3157 | - iobuf = NULL; |
3158 | - iobref = NULL; |
3159 | - |
3160 | -out: |
3161 | - if (ret == -1) { |
3162 | - if (iobuf != NULL) { |
3163 | - iobuf_unref (iobuf); |
3164 | - } |
3165 | - |
3166 | - if (iobref != NULL) { |
3167 | - iobref_unref (iobref); |
3168 | - } |
3169 | - } |
3170 | - |
3171 | - return 0; |
3172 | -} |
3173 | - |
3174 | - |
3175 | -int32_t |
3176 | -gf_rdma_decode_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post, |
3177 | - gf_rdma_read_chunk_t **readch, size_t bytes_in_post) |
3178 | -{ |
3179 | - int32_t ret = -1; |
3180 | - gf_rdma_header_t *header = NULL; |
3181 | - gf_rdma_reply_info_t *reply_info = NULL; |
3182 | - char *ptr = NULL; |
3183 | - gf_rdma_write_array_t *write_ary = NULL; |
3184 | - size_t header_len = 0; |
3185 | - |
3186 | - header = (gf_rdma_header_t *)post->buf; |
3187 | - |
3188 | - ptr = (char *)&header->rm_body.rm_chunks[0]; |
3189 | - |
3190 | - ret = gf_rdma_get_read_chunklist (&ptr, readch); |
3191 | - if (ret == -1) { |
3192 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3193 | - "cannot get read chunklist from msg"); |
3194 | - goto out; |
3195 | - } |
3196 | - |
3197 | - /* skip terminator of read-chunklist */ |
3198 | - ptr = ptr + sizeof (uint32_t); |
3199 | - |
3200 | - ret = gf_rdma_get_write_chunklist (&ptr, &write_ary); |
3201 | - if (ret == -1) { |
3202 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3203 | - "cannot get write chunklist from msg"); |
3204 | - goto out; |
3205 | - } |
3206 | - |
3207 | - /* skip terminator of write-chunklist */ |
3208 | - ptr = ptr + sizeof (uint32_t); |
3209 | - |
3210 | - if (write_ary != NULL) { |
3211 | - reply_info = gf_rdma_reply_info_alloc (peer); |
3212 | - if (reply_info == NULL) { |
3213 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3214 | - "reply_info_alloc failed"); |
3215 | - ret = -1; |
3216 | - goto out; |
3217 | - } |
3218 | - |
3219 | - reply_info->type = gf_rdma_writech; |
3220 | - reply_info->wc_array = write_ary; |
3221 | - reply_info->rm_xid = header->rm_xid; |
3222 | - } else { |
3223 | - ret = gf_rdma_get_write_chunklist (&ptr, &write_ary); |
3224 | - if (ret == -1) { |
3225 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3226 | - "cannot get reply chunklist from msg"); |
3227 | - goto out; |
3228 | - } |
3229 | - |
3230 | - if (write_ary != NULL) { |
3231 | - reply_info = gf_rdma_reply_info_alloc (peer); |
3232 | - if (reply_info == NULL) { |
3233 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3234 | - "reply_info_alloc_failed"); |
3235 | - ret = -1; |
3236 | - goto out; |
3237 | - } |
3238 | - |
3239 | - reply_info->type = gf_rdma_replych; |
3240 | - reply_info->wc_array = write_ary; |
3241 | - reply_info->rm_xid = header->rm_xid; |
3242 | - } |
3243 | - } |
3244 | - |
3245 | - /* skip terminator of reply chunk */ |
3246 | - ptr = ptr + sizeof (uint32_t); |
3247 | - if (header->rm_type != GF_RDMA_NOMSG) { |
3248 | - header_len = (long)ptr - (long)post->buf; |
3249 | - post->ctx.vector[0].iov_len = (bytes_in_post - header_len); |
3250 | - |
3251 | - post->ctx.hdr_iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, |
3252 | - (bytes_in_post - header_len)); |
3253 | - if (post->ctx.hdr_iobuf == NULL) { |
3254 | - ret = -1; |
3255 | - goto out; |
3256 | - } |
3257 | - |
3258 | - post->ctx.vector[0].iov_base = iobuf_ptr (post->ctx.hdr_iobuf); |
3259 | - memcpy (post->ctx.vector[0].iov_base, ptr, |
3260 | - post->ctx.vector[0].iov_len); |
3261 | - post->ctx.count = 1; |
3262 | - } |
3263 | - |
3264 | - post->ctx.reply_info = reply_info; |
3265 | -out: |
3266 | - if (ret == -1) { |
3267 | - if (*readch != NULL) { |
3268 | - GF_FREE (*readch); |
3269 | - *readch = NULL; |
3270 | - } |
3271 | - |
3272 | - GF_FREE (write_ary); |
3273 | - } |
3274 | - |
3275 | - return ret; |
3276 | -} |
3277 | - |
3278 | - |
3279 | -/* Assumes only one of either write-chunklist or a reply chunk is present */ |
3280 | -int32_t |
3281 | -gf_rdma_decode_header (gf_rdma_peer_t *peer, gf_rdma_post_t *post, |
3282 | - gf_rdma_read_chunk_t **readch, size_t bytes_in_post) |
3283 | -{ |
3284 | - int32_t ret = -1; |
3285 | - gf_rdma_header_t *header = NULL; |
3286 | - |
3287 | - header = (gf_rdma_header_t *)post->buf; |
3288 | - |
3289 | - header->rm_xid = ntoh32 (header->rm_xid); |
3290 | - header->rm_vers = ntoh32 (header->rm_vers); |
3291 | - header->rm_credit = ntoh32 (header->rm_credit); |
3292 | - header->rm_type = ntoh32 (header->rm_type); |
3293 | - |
3294 | - switch (header->rm_type) { |
3295 | - case GF_RDMA_MSG: |
3296 | - case GF_RDMA_NOMSG: |
3297 | - ret = gf_rdma_decode_msg (peer, post, readch, bytes_in_post); |
3298 | - if (ret < 0) { |
3299 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3300 | - "cannot decode msg of type (%d)", |
3301 | - header->rm_type); |
3302 | - } |
3303 | - |
3304 | - break; |
3305 | - |
3306 | - case GF_RDMA_MSGP: |
3307 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3308 | - "rdma msg of msg-type GF_RDMA_MSGP should not have " |
3309 | - "been received"); |
3310 | - ret = -1; |
3311 | - break; |
3312 | - |
3313 | - case GF_RDMA_DONE: |
3314 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3315 | - "rdma msg of msg-type GF_RDMA_DONE should not have " |
3316 | - "been received"); |
3317 | - ret = -1; |
3318 | - break; |
3319 | - |
3320 | - case GF_RDMA_ERROR: |
3321 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3322 | - "received a msg of type RDMA_ERROR"); |
3323 | - ret = gf_rdma_decode_error_msg (peer, post, bytes_in_post); |
3324 | - break; |
3325 | - |
3326 | - default: |
3327 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3328 | - "unknown rdma msg-type (%d)", header->rm_type); |
3329 | - } |
3330 | - |
3331 | - return ret; |
3332 | -} |
3333 | - |
3334 | - |
3335 | -int32_t |
3336 | -__gf_rdma_read (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *to, |
3337 | - gf_rdma_read_chunk_t *readch) |
3338 | -{ |
3339 | - int32_t ret = -1; |
3340 | - struct ibv_sge list = {0, }; |
3341 | - struct ibv_send_wr wr = {0, }, *bad_wr = NULL; |
3342 | - |
3343 | - ret = __gf_rdma_register_local_mr_for_rdma (peer, to, 1, &post->ctx); |
3344 | - if (ret == -1) { |
3345 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3346 | - "registering local memory for rdma read failed"); |
3347 | - goto out; |
3348 | - } |
3349 | - |
3350 | - list.addr = (unsigned long) to->iov_base; |
3351 | - list.length = to->iov_len; |
3352 | - list.lkey = post->ctx.mr[post->ctx.mr_count - 1]->lkey; |
3353 | - |
3354 | - wr.wr_id = (unsigned long) gf_rdma_post_ref (post); |
3355 | - wr.sg_list = &list; |
3356 | - wr.num_sge = 1; |
3357 | - wr.opcode = IBV_WR_RDMA_READ; |
3358 | - wr.send_flags = IBV_SEND_SIGNALED; |
3359 | - wr.wr.rdma.remote_addr = readch->rc_target.rs_offset; |
3360 | - wr.wr.rdma.rkey = readch->rc_target.rs_handle; |
3361 | - |
3362 | - ret = ibv_post_send (peer->qp, &wr, &bad_wr); |
3363 | - if (ret) { |
3364 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3365 | - "rdma read from client " |
3366 | - "(%s) failed with ret = %d (%s)", |
3367 | - peer->trans->peerinfo.identifier, |
3368 | - ret, (ret > 0) ? strerror (ret) : ""); |
3369 | - ret = -1; |
3370 | - gf_rdma_post_unref (post); |
3371 | - } |
3372 | -out: |
3373 | - return ret; |
3374 | -} |
3375 | - |
3376 | - |
3377 | -int32_t |
3378 | -gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post, |
3379 | - gf_rdma_read_chunk_t *readch) |
3380 | -{ |
3381 | - int32_t ret = -1, i = 0, count = 0; |
3382 | - size_t size = 0; |
3383 | - char *ptr = NULL; |
3384 | - struct iobuf *iobuf = NULL; |
3385 | - gf_rdma_private_t *priv = NULL; |
3386 | - |
3387 | - priv = peer->trans->private; |
3388 | - |
3389 | - for (i = 0; readch[i].rc_discrim != 0; i++) { |
3390 | - size += readch[i].rc_target.rs_length; |
3391 | - } |
3392 | - |
3393 | - if (i == 0) { |
3394 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3395 | - "message type specified as rdma-read but there are no " |
3396 | - "rdma read-chunks present"); |
3397 | - goto out; |
3398 | - } |
3399 | - |
3400 | - post->ctx.gf_rdma_reads = i; |
3401 | - |
3402 | - iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, size); |
3403 | - if (iobuf == NULL) { |
3404 | - goto out; |
3405 | - } |
3406 | - |
3407 | - if (post->ctx.iobref == NULL) { |
3408 | - post->ctx.iobref = iobref_new (); |
3409 | - if (post->ctx.iobref == NULL) { |
3410 | - iobuf_unref (iobuf); |
3411 | - goto out; |
3412 | - } |
3413 | - } |
3414 | - |
3415 | - iobref_add (post->ctx.iobref, iobuf); |
3416 | - iobuf_unref (iobuf); |
3417 | - |
3418 | - ptr = iobuf_ptr (iobuf); |
3419 | - iobuf = NULL; |
3420 | - |
3421 | - pthread_mutex_lock (&priv->write_mutex); |
3422 | - { |
3423 | - if (!priv->connected) { |
3424 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3425 | - "transport not connected to peer (%s), " |
3426 | - "not doing rdma reads", |
3427 | - peer->trans->peerinfo.identifier); |
3428 | - goto unlock; |
3429 | - } |
3430 | - |
3431 | - for (i = 0; readch[i].rc_discrim != 0; i++) { |
3432 | - count = post->ctx.count++; |
3433 | - post->ctx.vector[count].iov_base = ptr; |
3434 | - post->ctx.vector[count].iov_len |
3435 | - = readch[i].rc_target.rs_length; |
3436 | - |
3437 | - ret = __gf_rdma_read (peer, post, |
3438 | - &post->ctx.vector[count], |
3439 | - &readch[i]); |
3440 | - if (ret == -1) { |
3441 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3442 | - "rdma read from peer (%s) failed", |
3443 | - peer->trans->peerinfo.identifier); |
3444 | - goto unlock; |
3445 | - } |
3446 | - |
3447 | - ptr += readch[i].rc_target.rs_length; |
3448 | - } |
3449 | - |
3450 | - ret = 0; |
3451 | - } |
3452 | -unlock: |
3453 | - pthread_mutex_unlock (&priv->write_mutex); |
3454 | -out: |
3455 | - |
3456 | - if (ret == -1) { |
3457 | - if (iobuf != NULL) { |
3458 | - iobuf_unref (iobuf); |
3459 | - } |
3460 | - } |
3461 | - |
3462 | - return ret; |
3463 | -} |
3464 | - |
3465 | - |
3466 | -int32_t |
3467 | -gf_rdma_pollin_notify (gf_rdma_peer_t *peer, gf_rdma_post_t *post) |
3468 | -{ |
3469 | - int32_t ret = -1; |
3470 | - enum msg_type msg_type = 0; |
3471 | - struct rpc_req *rpc_req = NULL; |
3472 | - gf_rdma_request_context_t *request_context = NULL; |
3473 | - rpc_request_info_t request_info = {0, }; |
3474 | - gf_rdma_private_t *priv = NULL; |
3475 | - uint32_t *ptr = NULL; |
3476 | - rpc_transport_pollin_t *pollin = NULL; |
3477 | - |
3478 | - if ((peer == NULL) || (post == NULL)) { |
3479 | - goto out; |
3480 | - } |
3481 | - |
3482 | - if (post->ctx.iobref == NULL) { |
3483 | - post->ctx.iobref = iobref_new (); |
3484 | - if (post->ctx.iobref == NULL) { |
3485 | - goto out; |
3486 | - } |
3487 | - |
3488 | - /* handling the case where both hdr and payload of |
3489 | - * GF_FOP_READ_CBK were received in a single iobuf |
3490 | - * because of server sending entire msg as inline without |
3491 | - * doing rdma writes. |
3492 | - */ |
3493 | - if (post->ctx.hdr_iobuf) |
3494 | - iobref_add (post->ctx.iobref, post->ctx.hdr_iobuf); |
3495 | - } |
3496 | - |
3497 | - pollin = rpc_transport_pollin_alloc (peer->trans, |
3498 | - post->ctx.vector, |
3499 | - post->ctx.count, |
3500 | - post->ctx.hdr_iobuf, |
3501 | - post->ctx.iobref, |
3502 | - post->ctx.reply_info); |
3503 | - if (pollin == NULL) { |
3504 | - goto out; |
3505 | - } |
3506 | - |
3507 | - ptr = (uint32_t *)pollin->vector[0].iov_base; |
3508 | - |
3509 | - request_info.xid = ntoh32 (*ptr); |
3510 | - msg_type = ntoh32 (*(ptr + 1)); |
3511 | - |
3512 | - if (msg_type == REPLY) { |
3513 | - ret = rpc_transport_notify (peer->trans, |
3514 | - RPC_TRANSPORT_MAP_XID_REQUEST, |
3515 | - &request_info); |
3516 | - if (ret == -1) { |
3517 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG, |
3518 | - "cannot get request information from rpc " |
3519 | - "layer"); |
3520 | - goto out; |
3521 | - } |
3522 | - |
3523 | - rpc_req = request_info.rpc_req; |
3524 | - if (rpc_req == NULL) { |
3525 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG, |
3526 | - "rpc request structure not found"); |
3527 | - ret = -1; |
3528 | - goto out; |
3529 | - } |
3530 | - |
3531 | - request_context = rpc_req->conn_private; |
3532 | - rpc_req->conn_private = NULL; |
3533 | - |
3534 | - priv = peer->trans->private; |
3535 | - if (request_context != NULL) { |
3536 | - pthread_mutex_lock (&priv->write_mutex); |
3537 | - { |
3538 | - __gf_rdma_request_context_destroy (request_context); |
3539 | - } |
3540 | - pthread_mutex_unlock (&priv->write_mutex); |
3541 | - } else { |
3542 | - gf_rdma_quota_put (peer); |
3543 | - } |
3544 | - |
3545 | - pollin->is_reply = 1; |
3546 | - } |
3547 | - |
3548 | - ret = rpc_transport_notify (peer->trans, RPC_TRANSPORT_MSG_RECEIVED, |
3549 | - pollin); |
3550 | - if (ret < 0) { |
3551 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3552 | - "transport_notify failed"); |
3553 | - } |
3554 | - |
3555 | -out: |
3556 | - if (pollin != NULL) { |
3557 | - pollin->private = NULL; |
3558 | - rpc_transport_pollin_destroy (pollin); |
3559 | - } |
3560 | - |
3561 | - return ret; |
3562 | -} |
3563 | - |
3564 | - |
3565 | -int32_t |
3566 | -gf_rdma_recv_reply (gf_rdma_peer_t *peer, gf_rdma_post_t *post) |
3567 | -{ |
3568 | - int32_t ret = -1; |
3569 | - gf_rdma_header_t *header = NULL; |
3570 | - gf_rdma_reply_info_t *reply_info = NULL; |
3571 | - gf_rdma_write_array_t *wc_array = NULL; |
3572 | - int i = 0; |
3573 | - uint32_t *ptr = NULL; |
3574 | - gf_rdma_request_context_t *ctx = NULL; |
3575 | - rpc_request_info_t request_info = {0, }; |
3576 | - struct rpc_req *rpc_req = NULL; |
3577 | - |
3578 | - header = (gf_rdma_header_t *)post->buf; |
3579 | - reply_info = post->ctx.reply_info; |
3580 | - |
3581 | - /* no write chunklist, just notify upper layers */ |
3582 | - if (reply_info == NULL) { |
3583 | - ret = 0; |
3584 | - goto out; |
3585 | - } |
3586 | - |
3587 | - wc_array = reply_info->wc_array; |
3588 | - |
3589 | - if (header->rm_type == GF_RDMA_NOMSG) { |
3590 | - post->ctx.vector[0].iov_base |
3591 | - = (void *)(long)wc_array->wc_array[0].wc_target.rs_offset; |
3592 | - post->ctx.vector[0].iov_len |
3593 | - = wc_array->wc_array[0].wc_target.rs_length; |
3594 | - |
3595 | - post->ctx.count = 1; |
3596 | - } else { |
3597 | - for (i = 0; i < wc_array->wc_nchunks; i++) { |
3598 | - post->ctx.vector[i + 1].iov_base |
3599 | - = (void *)(long)wc_array->wc_array[i].wc_target.rs_offset; |
3600 | - post->ctx.vector[i + 1].iov_len |
3601 | - = wc_array->wc_array[i].wc_target.rs_length; |
3602 | - } |
3603 | - |
3604 | - post->ctx.count += wc_array->wc_nchunks; |
3605 | - } |
3606 | - |
3607 | - ptr = (uint32_t *)post->ctx.vector[0].iov_base; |
3608 | - request_info.xid = ntoh32 (*ptr); |
3609 | - |
3610 | - ret = rpc_transport_notify (peer->trans, |
3611 | - RPC_TRANSPORT_MAP_XID_REQUEST, |
3612 | - &request_info); |
3613 | - if (ret == -1) { |
3614 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3615 | - "cannot get request information (peer:%s) from rpc " |
3616 | - "layer", peer->trans->peerinfo.identifier); |
3617 | - goto out; |
3618 | - } |
3619 | - |
3620 | - rpc_req = request_info.rpc_req; |
3621 | - if (rpc_req == NULL) { |
3622 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3623 | - "rpc request structure not found"); |
3624 | - ret = -1; |
3625 | - goto out; |
3626 | - } |
3627 | - |
3628 | - ctx = rpc_req->conn_private; |
3629 | - if ((post->ctx.iobref == NULL) && ctx->rsp_iobref) { |
3630 | - post->ctx.iobref = iobref_ref (ctx->rsp_iobref); |
3631 | - } |
3632 | - |
3633 | - ret = 0; |
3634 | - |
3635 | - gf_rdma_reply_info_destroy (reply_info); |
3636 | - |
3637 | -out: |
3638 | - if (ret == 0) { |
3639 | - ret = gf_rdma_pollin_notify (peer, post); |
3640 | - if (ret < 0) { |
3641 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3642 | - "pollin notify failed"); |
3643 | - } |
3644 | - } |
3645 | - |
3646 | - return ret; |
3647 | -} |
3648 | - |
3649 | - |
3650 | -inline int32_t |
3651 | -gf_rdma_recv_request (gf_rdma_peer_t *peer, gf_rdma_post_t *post, |
3652 | - gf_rdma_read_chunk_t *readch) |
3653 | -{ |
3654 | - int32_t ret = -1; |
3655 | - |
3656 | - if (readch != NULL) { |
3657 | - ret = gf_rdma_do_reads (peer, post, readch); |
3658 | - if (ret < 0) { |
3659 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3660 | - "rdma read from peer (%s) failed", |
3661 | - peer->trans->peerinfo.identifier); |
3662 | - } |
3663 | - } else { |
3664 | - ret = gf_rdma_pollin_notify (peer, post); |
3665 | - if (ret == -1) { |
3666 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3667 | - "pollin notification failed"); |
3668 | - } |
3669 | - } |
3670 | - |
3671 | - return ret; |
3672 | -} |
3673 | - |
3674 | -void |
3675 | -gf_rdma_process_recv (gf_rdma_peer_t *peer, struct ibv_wc *wc) |
3676 | -{ |
3677 | - gf_rdma_post_t *post = NULL; |
3678 | - gf_rdma_read_chunk_t *readch = NULL; |
3679 | - int ret = -1; |
3680 | - uint32_t *ptr = NULL; |
3681 | - enum msg_type msg_type = 0; |
3682 | - gf_rdma_header_t *header = NULL; |
3683 | - gf_rdma_private_t *priv = NULL; |
3684 | - |
3685 | - post = (gf_rdma_post_t *) (long) wc->wr_id; |
3686 | - if (post == NULL) { |
3687 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3688 | - "no post found in successful work completion element"); |
3689 | - goto out; |
3690 | - } |
3691 | - |
3692 | - ret = gf_rdma_decode_header (peer, post, &readch, wc->byte_len); |
3693 | - if (ret == -1) { |
3694 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3695 | - "decoding of header failed"); |
3696 | - goto out; |
3697 | - } |
3698 | - |
3699 | - header = (gf_rdma_header_t *)post->buf; |
3700 | - |
3701 | - priv = peer->trans->private; |
3702 | - |
3703 | - pthread_mutex_lock (&priv->write_mutex); |
3704 | - { |
3705 | - if (!priv->peer.quota_set) { |
3706 | - priv->peer.quota_set = 1; |
3707 | - |
3708 | - /* Initially peer.quota is set to 1 as per RFC 5666. We |
3709 | - * have to account for the quota used while sending |
3710 | - * first msg (which may or may not be returned to pool |
3711 | - * at this point) while deriving peer.quota from |
3712 | - * header->rm_credit. Hence the arithmatic below, |
3713 | - * instead of directly setting it to header->rm_credit. |
3714 | - */ |
3715 | - priv->peer.quota = header->rm_credit |
3716 | - - ( 1 - priv->peer.quota); |
3717 | - } |
3718 | - } |
3719 | - pthread_mutex_unlock (&priv->write_mutex); |
3720 | - |
3721 | - switch (header->rm_type) { |
3722 | - case GF_RDMA_MSG: |
3723 | - ptr = (uint32_t *)post->ctx.vector[0].iov_base; |
3724 | - msg_type = ntoh32 (*(ptr + 1)); |
3725 | - break; |
3726 | - |
3727 | - case GF_RDMA_NOMSG: |
3728 | - if (readch != NULL) { |
3729 | - msg_type = CALL; |
3730 | - } else { |
3731 | - msg_type = REPLY; |
3732 | - } |
3733 | - break; |
3734 | - |
3735 | - case GF_RDMA_ERROR: |
3736 | - if (header->rm_body.rm_error.rm_type == ERR_CHUNK) { |
3737 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3738 | - "peer (%s), couldn't encode or decode the msg " |
3739 | - "properly or write chunks were not provided " |
3740 | - "for replies that were bigger than " |
3741 | - "RDMA_INLINE_THRESHOLD (%d)", |
3742 | - peer->trans->peerinfo.identifier, |
3743 | - GLUSTERFS_RDMA_INLINE_THRESHOLD); |
3744 | - ret = gf_rdma_pollin_notify (peer, post); |
3745 | - if (ret == -1) { |
3746 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG, |
3747 | - "pollin notification failed"); |
3748 | - } |
3749 | - goto out; |
3750 | - } else { |
3751 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR, |
3752 | - "an error has happened while transmission of " |
3753 | - "msg, disconnecting the transport"); |
3754 | - ret = -1; |
3755 | - goto out; |
3756 | - } |
3757 | - |
3758 | - default: |
3759 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3760 | - "invalid rdma msg-type (%d)", header->rm_type); |
3761 | - goto out; |
3762 | - } |
3763 | - |
3764 | - if (msg_type == CALL) { |
3765 | - ret = gf_rdma_recv_request (peer, post, readch); |
3766 | - if (ret < 0) { |
3767 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3768 | - "receiving a request from peer (%s) failed", |
3769 | - peer->trans->peerinfo.identifier); |
3770 | - } |
3771 | - } else { |
3772 | - ret = gf_rdma_recv_reply (peer, post); |
3773 | - if (ret < 0) { |
3774 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3775 | - "receiving a reply from peer (%s) failed", |
3776 | - peer->trans->peerinfo.identifier); |
3777 | - } |
3778 | - } |
3779 | - |
3780 | -out: |
3781 | - if (ret == -1) { |
3782 | - rpc_transport_disconnect (peer->trans); |
3783 | - } |
3784 | - |
3785 | - return; |
3786 | -} |
3787 | - |
3788 | -void * |
3789 | -gf_rdma_async_event_thread (void *context) |
3790 | -{ |
3791 | - struct ibv_async_event event; |
3792 | - int ret; |
3793 | - |
3794 | - while (1) { |
3795 | - do { |
3796 | - ret = ibv_get_async_event((struct ibv_context *)context, |
3797 | - &event); |
3798 | - |
3799 | - if (ret && errno != EINTR) { |
3800 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3801 | - "Error getting event (%s)", |
3802 | - strerror (errno)); |
3803 | - } |
3804 | - } while(ret && errno == EINTR); |
3805 | - |
3806 | - switch (event.event_type) { |
3807 | - case IBV_EVENT_SRQ_LIMIT_REACHED: |
3808 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3809 | - "recieved srq_limit reached"); |
3810 | - break; |
3811 | - |
3812 | - default: |
3813 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG, |
3814 | - "event (%d) recieved", event.event_type); |
3815 | - break; |
3816 | - } |
3817 | - |
3818 | - ibv_ack_async_event(&event); |
3819 | - } |
3820 | - |
3821 | - return 0; |
3822 | -} |
3823 | - |
3824 | - |
3825 | -static void * |
3826 | -gf_rdma_recv_completion_proc (void *data) |
3827 | -{ |
3828 | - struct ibv_comp_channel *chan = NULL; |
3829 | - gf_rdma_device_t *device = NULL;; |
3830 | - gf_rdma_post_t *post = NULL; |
3831 | - gf_rdma_peer_t *peer = NULL; |
3832 | - struct ibv_cq *event_cq = NULL; |
3833 | - struct ibv_wc wc = {0, }; |
3834 | - void *event_ctx = NULL; |
3835 | - int32_t ret = 0; |
3836 | - |
3837 | - chan = data; |
3838 | - |
3839 | - while (1) { |
3840 | - ret = ibv_get_cq_event (chan, &event_cq, &event_ctx); |
3841 | - if (ret) { |
3842 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR, |
3843 | - "ibv_get_cq_event failed, terminating recv " |
3844 | - "thread %d (%d)", ret, errno); |
3845 | - continue; |
3846 | - } |
3847 | - |
3848 | - device = event_ctx; |
3849 | - |
3850 | - ret = ibv_req_notify_cq (event_cq, 0); |
3851 | - if (ret) { |
3852 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR, |
3853 | - "ibv_req_notify_cq on %s failed, terminating " |
3854 | - "recv thread: %d (%d)", |
3855 | - device->device_name, ret, errno); |
3856 | - continue; |
3857 | - } |
3858 | - |
3859 | - device = (gf_rdma_device_t *) event_ctx; |
3860 | - |
3861 | - while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) { |
3862 | - post = (gf_rdma_post_t *) (long) wc.wr_id; |
3863 | - |
3864 | - pthread_mutex_lock (&device->qpreg.lock); |
3865 | - { |
3866 | - peer = __gf_rdma_lookup_peer (device, |
3867 | - wc.qp_num); |
3868 | - |
3869 | - /* |
3870 | - * keep a refcount on transport so that it |
3871 | - * does not get freed because of some error |
3872 | - * indicated by wc.status till we are done |
3873 | - * with usage of peer and thereby that of trans. |
3874 | - */ |
3875 | - if (peer != NULL) { |
3876 | - rpc_transport_ref (peer->trans); |
3877 | - } |
3878 | - } |
3879 | - pthread_mutex_unlock (&device->qpreg.lock); |
3880 | - |
3881 | - if (wc.status != IBV_WC_SUCCESS) { |
3882 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR, |
3883 | - "recv work request on `%s' returned " |
3884 | - "error (%d)", device->device_name, |
3885 | - wc.status); |
3886 | - if (peer) { |
3887 | - rpc_transport_unref (peer->trans); |
3888 | - rpc_transport_disconnect (peer->trans); |
3889 | - } |
3890 | - |
3891 | - if (post) { |
3892 | - gf_rdma_post_unref (post); |
3893 | - } |
3894 | - continue; |
3895 | - } |
3896 | - |
3897 | - if (peer) { |
3898 | - gf_rdma_process_recv (peer, &wc); |
3899 | - rpc_transport_unref (peer->trans); |
3900 | - } else { |
3901 | - gf_log (GF_RDMA_LOG_NAME, |
3902 | - GF_LOG_DEBUG, |
3903 | - "could not lookup peer for qp_num: %d", |
3904 | - wc.qp_num); |
3905 | - } |
3906 | - |
3907 | - gf_rdma_post_unref (post); |
3908 | - } |
3909 | - |
3910 | - if (ret < 0) { |
3911 | - gf_log (GF_RDMA_LOG_NAME, |
3912 | - GF_LOG_ERROR, |
3913 | - "ibv_poll_cq on `%s' returned error " |
3914 | - "(ret = %d, errno = %d)", |
3915 | - device->device_name, ret, errno); |
3916 | - continue; |
3917 | - } |
3918 | - ibv_ack_cq_events (event_cq, 1); |
3919 | - } |
3920 | - |
3921 | - return NULL; |
3922 | -} |
3923 | - |
3924 | - |
3925 | -void |
3926 | -gf_rdma_handle_failed_send_completion (gf_rdma_peer_t *peer, struct ibv_wc *wc) |
3927 | -{ |
3928 | - gf_rdma_post_t *post = NULL; |
3929 | - gf_rdma_device_t *device = NULL; |
3930 | - gf_rdma_private_t *priv = NULL; |
3931 | - |
3932 | - if (peer != NULL) { |
3933 | - priv = peer->trans->private; |
3934 | - if (priv != NULL) { |
3935 | - device = priv->device; |
3936 | - } |
3937 | - } |
3938 | - |
3939 | - |
3940 | - post = (gf_rdma_post_t *) (long) wc->wr_id; |
3941 | - |
3942 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
3943 | - "send work request on `%s' returned error " |
3944 | - "wc.status = %d, wc.vendor_err = %d, post->buf = %p, " |
3945 | - "wc.byte_len = %d, post->reused = %d", |
3946 | - (device != NULL) ? device->device_name : NULL, wc->status, |
3947 | - wc->vendor_err, post->buf, wc->byte_len, post->reused); |
3948 | - |
3949 | - if (wc->status == IBV_WC_RETRY_EXC_ERR) { |
3950 | - gf_log ("rdma", GF_LOG_ERROR, "connection between client and" |
3951 | - " server not working. check by running " |
3952 | - "'ibv_srq_pingpong'. also make sure subnet manager" |
3953 | - " is running (eg: 'opensm'), or check if rdma port is " |
3954 | - "valid (or active) by running 'ibv_devinfo'. contact " |
3955 | - "Gluster Support Team if the problem persists."); |
3956 | - } |
3957 | - |
3958 | - if (peer) { |
3959 | - rpc_transport_disconnect (peer->trans); |
3960 | - } |
3961 | - |
3962 | - return; |
3963 | -} |
3964 | - |
3965 | - |
3966 | -void |
3967 | -gf_rdma_handle_successful_send_completion (gf_rdma_peer_t *peer, |
3968 | - struct ibv_wc *wc) |
3969 | -{ |
3970 | - gf_rdma_post_t *post = NULL; |
3971 | - int reads = 0, ret = 0; |
3972 | - gf_rdma_header_t *header = NULL; |
3973 | - |
3974 | - if (wc->opcode != IBV_WC_RDMA_READ) { |
3975 | - goto out; |
3976 | - } |
3977 | - |
3978 | - post = (gf_rdma_post_t *)(long) wc->wr_id; |
3979 | - |
3980 | - pthread_mutex_lock (&post->lock); |
3981 | - { |
3982 | - reads = --post->ctx.gf_rdma_reads; |
3983 | - } |
3984 | - pthread_mutex_unlock (&post->lock); |
3985 | - |
3986 | - if (reads != 0) { |
3987 | - /* if it is not the last rdma read, we've got nothing to do */ |
3988 | - goto out; |
3989 | - } |
3990 | - |
3991 | - header = (gf_rdma_header_t *)post->buf; |
3992 | - |
3993 | - if (header->rm_type == GF_RDMA_NOMSG) { |
3994 | - post->ctx.count = 1; |
3995 | - post->ctx.vector[0].iov_len += post->ctx.vector[1].iov_len; |
3996 | - } |
3997 | - |
3998 | - ret = gf_rdma_pollin_notify (peer, post); |
3999 | - if ((ret == -1) && (peer != NULL)) { |
4000 | - rpc_transport_disconnect (peer->trans); |
4001 | - } |
4002 | - |
4003 | -out: |
4004 | - return; |
4005 | -} |
4006 | - |
4007 | - |
4008 | -static void * |
4009 | -gf_rdma_send_completion_proc (void *data) |
4010 | -{ |
4011 | - struct ibv_comp_channel *chan = NULL; |
4012 | - gf_rdma_post_t *post = NULL; |
4013 | - gf_rdma_peer_t *peer = NULL; |
4014 | - struct ibv_cq *event_cq = NULL; |
4015 | - void *event_ctx = NULL; |
4016 | - gf_rdma_device_t *device = NULL; |
4017 | - struct ibv_wc wc = {0, }; |
4018 | - char is_request = 0; |
4019 | - int32_t ret = 0, quota_ret = 0; |
4020 | - |
4021 | - chan = data; |
4022 | - while (1) { |
4023 | - ret = ibv_get_cq_event (chan, &event_cq, &event_ctx); |
4024 | - if (ret) { |
4025 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR, |
4026 | - "ibv_get_cq_event on failed, terminating " |
4027 | - "send thread: %d (%d)", ret, errno); |
4028 | - continue; |
4029 | - } |
4030 | - |
4031 | - device = event_ctx; |
4032 | - |
4033 | - ret = ibv_req_notify_cq (event_cq, 0); |
4034 | - if (ret) { |
4035 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR, |
4036 | - "ibv_req_notify_cq on %s failed, terminating " |
4037 | - "send thread: %d (%d)", |
4038 | - device->device_name, ret, errno); |
4039 | - continue; |
4040 | - } |
4041 | - |
4042 | - while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) { |
4043 | - post = (gf_rdma_post_t *) (long) wc.wr_id; |
4044 | - |
4045 | - pthread_mutex_lock (&device->qpreg.lock); |
4046 | - { |
4047 | - peer = __gf_rdma_lookup_peer (device, wc.qp_num); |
4048 | - |
4049 | - /* |
4050 | - * keep a refcount on transport so that it |
4051 | - * does not get freed because of some error |
4052 | - * indicated by wc.status, till we are done |
4053 | - * with usage of peer and thereby that of trans. |
4054 | - */ |
4055 | - if (peer != NULL) { |
4056 | - rpc_transport_ref (peer->trans); |
4057 | - } |
4058 | - } |
4059 | - pthread_mutex_unlock (&device->qpreg.lock); |
4060 | - |
4061 | - if (wc.status != IBV_WC_SUCCESS) { |
4062 | - gf_rdma_handle_failed_send_completion (peer, &wc); |
4063 | - } else { |
4064 | - gf_rdma_handle_successful_send_completion (peer, |
4065 | - &wc); |
4066 | - } |
4067 | - |
4068 | - if (post) { |
4069 | - is_request = post->ctx.is_request; |
4070 | - |
4071 | - ret = gf_rdma_post_unref (post); |
4072 | - if ((ret == 0) |
4073 | - && (wc.status == IBV_WC_SUCCESS) |
4074 | - && !is_request |
4075 | - && (post->type == GF_RDMA_SEND_POST) |
4076 | - && (peer != NULL)) { |
4077 | - /* An GF_RDMA_RECV_POST can end up in |
4078 | - * gf_rdma_send_completion_proc for |
4079 | - * rdma-reads, and we do not take |
4080 | - * quota for getting an GF_RDMA_RECV_POST. |
4081 | - */ |
4082 | - |
4083 | - /* |
4084 | - * if it is request, quota is returned |
4085 | - * after reply has come. |
4086 | - */ |
4087 | - quota_ret = gf_rdma_quota_put (peer); |
4088 | - if (quota_ret < 0) { |
4089 | - gf_log ("rdma", GF_LOG_DEBUG, |
4090 | - "failed to send " |
4091 | - "message"); |
4092 | - } |
4093 | - } |
4094 | - } |
4095 | - |
4096 | - if (peer) { |
4097 | - rpc_transport_unref (peer->trans); |
4098 | - } else { |
4099 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG, |
4100 | - "could not lookup peer for qp_num: %d", |
4101 | - wc.qp_num); |
4102 | - } |
4103 | - } |
4104 | - |
4105 | - if (ret < 0) { |
4106 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR, |
4107 | - "ibv_poll_cq on `%s' returned error (ret = %d," |
4108 | - " errno = %d)", |
4109 | - device->device_name, ret, errno); |
4110 | - continue; |
4111 | - } |
4112 | - |
4113 | - ibv_ack_cq_events (event_cq, 1); |
4114 | - } |
4115 | - |
4116 | - return NULL; |
4117 | -} |
4118 | - |
4119 | - |
4120 | -static void |
4121 | -gf_rdma_options_init (rpc_transport_t *this) |
4122 | -{ |
4123 | - gf_rdma_private_t *priv = NULL; |
4124 | - gf_rdma_options_t *options = NULL; |
4125 | - int32_t mtu = 0; |
4126 | - data_t *temp = NULL; |
4127 | - |
4128 | - /* TODO: validate arguments from options below */ |
4129 | - |
4130 | - priv = this->private; |
4131 | - options = &priv->options; |
4132 | - options->send_size = GLUSTERFS_RDMA_INLINE_THRESHOLD;/*this->ctx->page_size * 4; 512 KB*/ |
4133 | - options->recv_size = GLUSTERFS_RDMA_INLINE_THRESHOLD;/*this->ctx->page_size * 4; 512 KB*/ |
4134 | - options->send_count = 4096; |
4135 | - options->recv_count = 4096; |
4136 | - options->attr_timeout = GF_RDMA_TIMEOUT; |
4137 | - options->attr_retry_cnt = GF_RDMA_RETRY_CNT; |
4138 | - options->attr_rnr_retry = GF_RDMA_RNR_RETRY; |
4139 | - |
4140 | - temp = dict_get (this->options, |
4141 | - "transport.rdma.work-request-send-count"); |
4142 | - if (temp) |
4143 | - options->send_count = data_to_int32 (temp); |
4144 | - |
4145 | - temp = dict_get (this->options, |
4146 | - "transport.rdma.work-request-recv-count"); |
4147 | - if (temp) |
4148 | - options->recv_count = data_to_int32 (temp); |
4149 | - |
4150 | - temp = dict_get (this->options, "transport.rdma.attr-timeout"); |
4151 | - |
4152 | - if (temp) |
4153 | - options->attr_timeout = data_to_uint8 (temp); |
4154 | - |
4155 | - temp = dict_get (this->options, "transport.rdma.attr-retry-cnt"); |
4156 | - |
4157 | - if (temp) |
4158 | - options->attr_retry_cnt = data_to_uint8 (temp); |
4159 | - |
4160 | - temp = dict_get (this->options, "transport.rdma.attr-rnr-retry"); |
4161 | - |
4162 | - if (temp) |
4163 | - options->attr_rnr_retry = data_to_uint8 (temp); |
4164 | - |
4165 | - options->port = 1; |
4166 | - temp = dict_get (this->options, |
4167 | - "transport.rdma.port"); |
4168 | - if (temp) |
4169 | - options->port = data_to_uint64 (temp); |
4170 | - |
4171 | - options->mtu = mtu = IBV_MTU_2048; |
4172 | - temp = dict_get (this->options, |
4173 | - "transport.rdma.mtu"); |
4174 | - if (temp) |
4175 | - mtu = data_to_int32 (temp); |
4176 | - switch (mtu) { |
4177 | - case 256: options->mtu = IBV_MTU_256; |
4178 | - break; |
4179 | - case 512: options->mtu = IBV_MTU_512; |
4180 | - break; |
4181 | - case 1024: options->mtu = IBV_MTU_1024; |
4182 | - break; |
4183 | - case 2048: options->mtu = IBV_MTU_2048; |
4184 | - break; |
4185 | - case 4096: options->mtu = IBV_MTU_4096; |
4186 | - break; |
4187 | - default: |
4188 | - if (temp) |
4189 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
4190 | - "%s: unrecognized MTU value '%s', defaulting " |
4191 | - "to '2048'", this->name, |
4192 | - data_to_str (temp)); |
4193 | - else |
4194 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE, |
4195 | - "%s: defaulting MTU to '2048'", |
4196 | - this->name); |
4197 | - options->mtu = IBV_MTU_2048; |
4198 | - break; |
4199 | - } |
4200 | - |
4201 | - temp = dict_get (this->options, |
4202 | - "transport.rdma.device-name"); |
4203 | - if (temp) |
4204 | - options->device_name = gf_strdup (temp->data); |
4205 | - |
4206 | - return; |
4207 | -} |
4208 | - |
4209 | - |
4210 | -gf_rdma_ctx_t * |
4211 | -__gf_rdma_ctx_create (void) |
4212 | -{ |
4213 | - gf_rdma_ctx_t *rdma_ctx = NULL; |
4214 | - int ret = -1; |
4215 | - |
4216 | - rdma_ctx = GF_CALLOC (1, sizeof (*rdma_ctx), gf_common_mt_char); |
4217 | - if (rdma_ctx == NULL) { |
4218 | - goto out; |
4219 | - } |
4220 | - |
4221 | - rdma_ctx->rdma_cm_event_channel = rdma_create_event_channel (); |
4222 | - if (rdma_ctx->rdma_cm_event_channel == NULL) { |
4223 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
4224 | - "rdma_cm event channel creation failed (%s)", |
4225 | - strerror (errno)); |
4226 | - goto out; |
4227 | - } |
4228 | - |
4229 | - ret = pthread_create (&rdma_ctx->rdma_cm_thread, NULL, |
4230 | - gf_rdma_cm_event_handler, |
4231 | - rdma_ctx->rdma_cm_event_channel); |
4232 | - if (ret != 0) { |
4233 | - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, |
4234 | - "creation of thread to handle rdma-cm events " |
4235 | - "failed (%s)", strerror (ret)); |
4236 | - goto out; |
4237 | - } |
4238 | - |
4239 | -out: |
4240 | - if (ret < 0) { |
4241 | - if (rdma_ctx->rdma_cm_event_channel != NULL) { |
4242 | - rdma_destroy_event_channel (rdma_ctx->rdma_cm_event_channel); |
4243 | - } |
4244 | - |
4245 | - GF_FREE (rdma_ctx); |
4246 | - rdma_ctx = NULL; |
4247 | - } |
4248 | - |
4249 | - return rdma_ctx; |
4250 | -} |
4251 | - |
4252 | -static int32_t |
4253 | -gf_rdma_init (rpc_transport_t *this) |
4254 | -{ |
4255 | - gf_rdma_private_t *priv = NULL; |
4256 | - int32_t ret = 0; |
4257 | - glusterfs_ctx_t *ctx = NULL; |
4258 | - gf_rdma_options_t *options = NULL; |
4259 | - |
4260 | - ctx= this->ctx; |
4261 | - |
4262 | - priv = this->private; |
4263 | - |
4264 | - ibv_fork_init (); |
4265 | - gf_rdma_options_init (this); |
4266 | - |
4267 | - options = &priv->options; |
4268 | - priv->peer.send_count = options->send_count; |
4269 | - priv->peer.recv_count = options->recv_count; |
4270 | - priv->peer.send_size = options->send_size; |
4271 | - priv->peer.recv_size = options->recv_size; |
4272 | - |
4273 | - priv->peer.trans = this; |
4274 | - INIT_LIST_HEAD (&priv->peer.ioq); |
4275 | - |
4276 | - pthread_mutex_init (&priv->write_mutex, NULL); |
4277 | - pthread_mutex_init (&priv->recv_mutex, NULL); |
4278 | - pthread_cond_init (&priv->recv_cond, NULL); |
4279 | - |
4280 | - pthread_mutex_lock (&ctx->lock); |
4281 | - { |
4282 | - if (ctx->ib == NULL) { |
4283 | - ctx->ib = __gf_rdma_ctx_create (); |
4284 | - if (ctx->ib == NULL) { |
4285 | - ret = -1; |
4286 | - } |
4287 | - } |
4288 | - } |
4289 | - pthread_mutex_unlock (&ctx->lock); |
4290 | - |
4291 | - return ret; |
4292 | -} |
4293 | - |
4294 | - |
4295 | -static int32_t |
4296 | -gf_rdma_disconnect (rpc_transport_t *this) |
4297 | -{ |
4298 | - gf_rdma_private_t *priv = NULL; |
4299 | - int32_t ret = 0; |
4300 | - |
4301 | - priv = this->private; |
4302 | - gf_log_callingfn (this->name, GF_LOG_WARNING, |
4303 | - "disconnect called (peer:%s)", |
4304 | - this->peerinfo.identifier); |
4305 | - |
4306 | - pthread_mutex_lock (&priv->write_mutex); |
4307 | - { |
4308 | - ret = __gf_rdma_disconnect (this); |
4309 | - } |
4310 | - pthread_mutex_unlock (&priv->write_mutex); |
4311 | - |
4312 | - return ret; |
4313 | -} |
4314 | - |
4315 | - |
4316 | -static int32_t |
4317 | -gf_rdma_connect (struct rpc_transport *this, int port) |
4318 | -{ |
4319 | - gf_rdma_private_t *priv = NULL; |
4320 | - int32_t ret = 0; |
4321 | - union gf_sock_union sock_union = {{0, }, }; |
4322 | - socklen_t sockaddr_len = 0; |
4323 | - gf_rdma_peer_t *peer = NULL; |
4324 | - gf_rdma_ctx_t *rdma_ctx = NULL; |
4325 | - gf_boolean_t connected = _gf_false; |
4326 | - |
4327 | - priv = this->private; |
4328 | - |
4329 | - peer = &priv->peer; |
4330 | - |
4331 | - rpc_transport_ref (this); |
4332 | - |
4333 | - ret = gf_rdma_client_get_remote_sockaddr (this, |
4334 | - &sock_union.sa, |
4335 | - &sockaddr_len, port); |
4336 | - if (ret != 0) { |
4337 | - gf_log (this->name, GF_LOG_DEBUG, |
4338 | - "cannot get remote address to connect"); |
4339 | - goto out; |
4340 | - } |
4341 | - |
4342 | - rdma_ctx = this->ctx->ib; |
4343 | - |
4344 | - pthread_mutex_lock (&priv->write_mutex); |
4345 | - { |
4346 | - if (peer->cm_id != NULL) { |
4347 | - ret = -1; |
4348 | - errno = EINPROGRESS; |
4349 | - connected = _gf_true; |
4350 | - goto unlock; |
4351 | - } |
4352 | - |
4353 | - priv->entity = GF_RDMA_CLIENT; |
4354 | - |
4355 | - ret = rdma_create_id (rdma_ctx->rdma_cm_event_channel, |
4356 | - &peer->cm_id, this, RDMA_PS_TCP); |
4357 | - if (ret != 0) { |
4358 | - gf_log (this->name, GF_LOG_ERROR, |
4359 | - "creation of rdma_cm_id failed (%s)", |
4360 | - strerror (errno)); |
4361 | - ret = -errno; |
4362 | - goto unlock; |
4363 | - } |
4364 | - |
4365 | - memcpy (&this->peerinfo.sockaddr, &sock_union.storage, |
4366 | - sockaddr_len); |
4367 | - this->peerinfo.sockaddr_len = sockaddr_len; |
4368 | - |
4369 | - if (port > 0) |
4370 | - sock_union.sin.sin_port = htons (port); |
4371 | - |
4372 | - ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = |
4373 | - ((struct sockaddr *)&this->peerinfo.sockaddr)->sa_family; |
4374 | - |
4375 | - ret = gf_rdma_client_bind (this, |
4376 | - (struct sockaddr *)&this->myinfo.sockaddr, |
4377 | - &this->myinfo.sockaddr_len, |
4378 | - peer->cm_id); |
4379 | - if (ret != 0) { |
4380 | - gf_log (this->name, GF_LOG_WARNING, |
4381 | - "client bind failed: %s", strerror (errno)); |
4382 | - goto unlock; |
4383 | - } |
4384 | - |
4385 | - ret = rdma_resolve_addr (peer->cm_id, NULL, &sock_union.sa, |
4386 | - 2000); |
4387 | - if (ret != 0) { |
4388 | - gf_log (this->name, GF_LOG_WARNING, |
4389 | - "rdma_resolve_addr failed (%s)", |
4390 | - strerror (errno)); |
4391 | - goto unlock; |
4392 | - } |
4393 | - |
4394 | - priv->connected = 0; |
4395 | - } |
4396 | -unlock: |
4397 | - pthread_mutex_unlock (&priv->write_mutex); |
4398 | - |
4399 | -out: |
4400 | - if (ret != 0) { |
4401 | - if (!connected) { |
4402 | - gf_rdma_teardown (this); |
4403 | - } |
4404 | - |
4405 | - rpc_transport_unref (this); |
4406 | - } |
4407 | - |
4408 | - return ret; |
4409 | -} |
4410 | - |
4411 | - |
4412 | -static int32_t |
4413 | -gf_rdma_listen (rpc_transport_t *this) |
4414 | -{ |
4415 | - union gf_sock_union sock_union = {{0, }, }; |
4416 | - socklen_t sockaddr_len = 0; |
4417 | - gf_rdma_private_t *priv = NULL; |
4418 | - gf_rdma_peer_t *peer = NULL; |
4419 | - int ret = 0; |
4420 | - gf_rdma_ctx_t *rdma_ctx = NULL; |
4421 | - char service[NI_MAXSERV], host[NI_MAXHOST]; |
4422 | - |
4423 | - priv = this->private; |
4424 | - peer = &priv->peer; |
4425 | - |
4426 | - priv->entity = GF_RDMA_SERVER_LISTENER; |
4427 | - |
4428 | - rdma_ctx = this->ctx->ib; |
4429 | - |
4430 | - ret = gf_rdma_server_get_local_sockaddr (this, &sock_union.sa, |
4431 | - &sockaddr_len); |
4432 | - if (ret != 0) { |
4433 | - gf_log (this->name, GF_LOG_WARNING, |
4434 | - "cannot find network address of server to bind to"); |
4435 | - goto err; |
4436 | - } |
4437 | - |
4438 | - ret = rdma_create_id (rdma_ctx->rdma_cm_event_channel, |
4439 | - &peer->cm_id, this, RDMA_PS_TCP); |
4440 | - if (ret != 0) { |
4441 | - gf_log (this->name, GF_LOG_WARNING, |
4442 | - "creation of rdma_cm_id failed (%s)", |
4443 | - strerror (errno)); |
4444 | - goto err; |
4445 | - } |
4446 | - |
4447 | - memcpy (&this->myinfo.sockaddr, &sock_union.storage, |
4448 | - sockaddr_len); |
4449 | - this->myinfo.sockaddr_len = sockaddr_len; |
4450 | - |
4451 | - ret = getnameinfo ((struct sockaddr *)&this->myinfo.sockaddr, |
4452 | - this->myinfo.sockaddr_len, host, sizeof (host), |
4453 | - service, sizeof (service), |
4454 | - NI_NUMERICHOST); |
4455 | - if (ret != 0) { |
4456 | - gf_log (this->name, GF_LOG_ERROR, |
4457 | - "getnameinfo failed (%s)", gai_strerror (ret)); |
4458 | - goto err; |
4459 | - } |
4460 | - |
4461 | - sprintf (this->myinfo.identifier, "%s:%s", host, service); |
4462 | - |
4463 | - ret = rdma_bind_addr (peer->cm_id, &sock_union.sa); |
4464 | - if (ret != 0) { |
4465 | - gf_log (this->name, GF_LOG_WARNING, |
4466 | - "rdma_bind_addr failed (%s)", strerror (errno)); |
4467 | - goto err; |
4468 | - } |
4469 | - |
4470 | - ret = rdma_listen (peer->cm_id, 10); |
4471 | - if (ret != 0) { |
4472 | - gf_log (this->name, GF_LOG_WARNING, |
4473 | - "rdma_listen failed (%s)", strerror (errno)); |
4474 | - goto err; |
4475 | - } |
4476 | - |
4477 | - rpc_transport_ref (this); |
4478 | - |
4479 | - ret = 0; |
4480 | -err: |
4481 | - if (ret < 0) { |
4482 | - if (peer->cm_id != NULL) { |
4483 | - rdma_destroy_id (peer->cm_id); |
4484 | - peer->cm_id = NULL; |
4485 | - } |
4486 | - } |
4487 | - |
4488 | - return ret; |
4489 | -} |
4490 | - |
4491 | - |
4492 | -struct rpc_transport_ops tops = { |
4493 | - .submit_request = gf_rdma_submit_request, |
4494 | - .submit_reply = gf_rdma_submit_reply, |
4495 | - .connect = gf_rdma_connect, |
4496 | - .disconnect = gf_rdma_disconnect, |
4497 | - .listen = gf_rdma_listen, |
4498 | -}; |
4499 | - |
4500 | -int32_t |
4501 | -init (rpc_transport_t *this) |
4502 | -{ |
4503 | - gf_rdma_private_t *priv = NULL; |
4504 | - |
4505 | - priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_rdma_private_t); |
4506 | - if (!priv) |
4507 | - return -1; |
4508 | - |
4509 | - this->private = priv; |
4510 | - |
4511 | - if (gf_rdma_init (this)) { |
4512 | - gf_log (this->name, GF_LOG_ERROR, |
4513 | - "Failed to initialize IB Device"); |
4514 | - return -1; |
4515 | - } |
4516 | - |
4517 | - return 0; |
4518 | -} |
4519 | - |
4520 | -void |
4521 | -fini (struct rpc_transport *this) |
4522 | -{ |
4523 | - /* TODO: verify this function does graceful finish */ |
4524 | - gf_rdma_private_t *priv = NULL; |
4525 | - |
4526 | - priv = this->private; |
4527 | - |
4528 | - this->private = NULL; |
4529 | - |
4530 | - if (priv) { |
4531 | - pthread_mutex_destroy (&priv->recv_mutex); |
4532 | - pthread_mutex_destroy (&priv->write_mutex); |
4533 | - |
4534 | - gf_log (this->name, GF_LOG_TRACE, |
4535 | - "called fini on transport: %p", this); |
4536 | - GF_FREE (priv); |
4537 | - } |
4538 | - return; |
4539 | -} |
4540 | - |
4541 | -/* TODO: expand each option */ |
4542 | -struct volume_options options[] = { |
4543 | - { .key = {"transport.rdma.port", |
4544 | - "rdma-port"}, |
4545 | - .type = GF_OPTION_TYPE_INT, |
4546 | - .min = 1, |
4547 | - .max = 4, |
4548 | - .description = "check the option by 'ibv_devinfo'" |
4549 | - }, |
4550 | - { .key = {"transport.rdma.mtu", |
4551 | - "rdma-mtu"}, |
4552 | - .type = GF_OPTION_TYPE_INT, |
4553 | - }, |
4554 | - { .key = {"transport.rdma.device-name", |
4555 | - "rdma-device-name"}, |
4556 | - .type = GF_OPTION_TYPE_ANY, |
4557 | - .description = "check by 'ibv_devinfo'" |
4558 | - }, |
4559 | - { .key = {"transport.rdma.work-request-send-count", |
4560 | - "rdma-work-request-send-count"}, |
4561 | - .type = GF_OPTION_TYPE_INT, |
4562 | - }, |
4563 | - { .key = {"transport.rdma.work-request-recv-count", |
4564 | - "rdma-work-request-recv-count"}, |
4565 | - .type = GF_OPTION_TYPE_INT, |
4566 | - }, |
4567 | - { .key = {"remote-port", |
4568 | - "transport.remote-port", |
4569 | - "transport.rdma.remote-port"}, |
4570 | - .type = GF_OPTION_TYPE_INT |
4571 | - }, |
4572 | - { .key = {"transport.rdma.attr-timeout", |
4573 | - "rdma-attr-timeout"}, |
4574 | - .type = GF_OPTION_TYPE_INT |
4575 | - }, |
4576 | - { .key = {"transport.rdma.attr-retry-cnt", |
4577 | - "rdma-attr-retry-cnt"}, |
4578 | - .type = GF_OPTION_TYPE_INT |
4579 | - }, |
4580 | - { .key = {"transport.rdma.attr-rnr-retry", |
4581 | - "rdma-attr-rnr-retry"}, |
4582 | - .type = GF_OPTION_TYPE_INT |
4583 | - }, |
4584 | - { .key = {"transport.rdma.listen-port", "listen-port"}, |
4585 | - .type = GF_OPTION_TYPE_INT |
4586 | - }, |
4587 | - { .key = {"transport.rdma.connect-path", "connect-path"}, |
4588 | - .type = GF_OPTION_TYPE_ANY |
4589 | - }, |
4590 | - { .key = {"transport.rdma.bind-path", "bind-path"}, |
4591 | - .type = GF_OPTION_TYPE_ANY |
4592 | - }, |
4593 | - { .key = {"transport.rdma.listen-path", "listen-path"}, |
4594 | - .type = GF_OPTION_TYPE_ANY |
4595 | - }, |
4596 | - { .key = {"transport.address-family", |
4597 | - "address-family"}, |
4598 | - .value = {"inet", "inet6", "inet/inet6", "inet6/inet", |
4599 | - "unix", "inet-sdp" }, |
4600 | - .type = GF_OPTION_TYPE_STR |
4601 | - }, |
4602 | - { .key = {"transport.socket.lowlat"}, |
4603 | - .type = GF_OPTION_TYPE_BOOL |
4604 | - }, |
4605 | - { .key = {NULL} } |
4606 | -}; |
4607 | |
4608 | === removed directory '.pc/01-spelling-error.diff/xlators' |
4609 | === removed directory '.pc/01-spelling-error.diff/xlators/mgmt' |
4610 | === removed directory '.pc/01-spelling-error.diff/xlators/mgmt/glusterd' |
4611 | === removed directory '.pc/01-spelling-error.diff/xlators/mgmt/glusterd/src' |
4612 | === removed file '.pc/01-spelling-error.diff/xlators/mgmt/glusterd/src/glusterd-store.c' |
4613 | --- .pc/01-spelling-error.diff/xlators/mgmt/glusterd/src/glusterd-store.c 2013-09-27 21:40:14 +0000 |
4614 | +++ .pc/01-spelling-error.diff/xlators/mgmt/glusterd/src/glusterd-store.c 1970-01-01 00:00:00 +0000 |
4615 | @@ -1,2530 +0,0 @@ |
4616 | -/* |
4617 | - Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com> |
4618 | - This file is part of GlusterFS. |
4619 | - |
4620 | - This file is licensed to you under your choice of the GNU Lesser |
4621 | - General Public License, version 3 or any later version (LGPLv3 or |
4622 | - later), or the GNU General Public License, version 2 (GPLv2), in all |
4623 | - cases as published by the Free Software Foundation. |
4624 | -*/ |
4625 | - |
4626 | -#ifndef _CONFIG_H |
4627 | -#define _CONFIG_H |
4628 | -#include "config.h" |
4629 | -#endif |
4630 | - |
4631 | -#include "glusterd-op-sm.h" |
4632 | -#include <inttypes.h> |
4633 | - |
4634 | - |
4635 | -#include "globals.h" |
4636 | -#include "glusterfs.h" |
4637 | -#include "compat.h" |
4638 | -#include "dict.h" |
4639 | -#include "protocol-common.h" |
4640 | -#include "xlator.h" |
4641 | -#include "logging.h" |
4642 | -#include "timer.h" |
4643 | -#include "defaults.h" |
4644 | -#include "compat.h" |
4645 | -#include "compat-errno.h" |
4646 | -#include "statedump.h" |
4647 | -#include "glusterd-mem-types.h" |
4648 | -#include "glusterd.h" |
4649 | -#include "glusterd-sm.h" |
4650 | -#include "glusterd-op-sm.h" |
4651 | -#include "glusterd-utils.h" |
4652 | -#include "glusterd-hooks.h" |
4653 | -#include "store.h" |
4654 | -#include "glusterd-store.h" |
4655 | - |
4656 | -#include "rpc-clnt.h" |
4657 | -#include "common-utils.h" |
4658 | - |
4659 | -#include <sys/resource.h> |
4660 | -#include <inttypes.h> |
4661 | -#include <dirent.h> |
4662 | - |
4663 | -void |
4664 | -glusterd_replace_slash_with_hipen (char *str) |
4665 | -{ |
4666 | - char *ptr = NULL; |
4667 | - |
4668 | - ptr = strchr (str, '/'); |
4669 | - |
4670 | - while (ptr) { |
4671 | - *ptr = '-'; |
4672 | - ptr = strchr (str, '/'); |
4673 | - } |
4674 | -} |
4675 | - |
4676 | -int32_t |
4677 | -glusterd_store_create_brick_dir (glusterd_volinfo_t *volinfo) |
4678 | -{ |
4679 | - int32_t ret = -1; |
4680 | - char brickdirpath[PATH_MAX] = {0,}; |
4681 | - glusterd_conf_t *priv = NULL; |
4682 | - |
4683 | - GF_ASSERT (volinfo); |
4684 | - |
4685 | - priv = THIS->private; |
4686 | - GF_ASSERT (priv); |
4687 | - |
4688 | - GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv); |
4689 | - ret = gf_store_mkdir (brickdirpath); |
4690 | - |
4691 | - return ret; |
4692 | -} |
4693 | - |
4694 | -static void |
4695 | -glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo, |
4696 | - char *key_vol_brick, size_t len) |
4697 | -{ |
4698 | - GF_ASSERT (brickinfo); |
4699 | - GF_ASSERT (key_vol_brick); |
4700 | - GF_ASSERT (len >= PATH_MAX); |
4701 | - |
4702 | - snprintf (key_vol_brick, len, "%s", brickinfo->path); |
4703 | - glusterd_replace_slash_with_hipen (key_vol_brick); |
4704 | -} |
4705 | - |
4706 | -static void |
4707 | -glusterd_store_brickinfofname_set (glusterd_brickinfo_t *brickinfo, |
4708 | - char *brickfname, size_t len) |
4709 | -{ |
4710 | - char key_vol_brick[PATH_MAX] = {0}; |
4711 | - |
4712 | - GF_ASSERT (brickfname); |
4713 | - GF_ASSERT (brickinfo); |
4714 | - GF_ASSERT (len >= PATH_MAX); |
4715 | - |
4716 | - glusterd_store_key_vol_brick_set (brickinfo, key_vol_brick, |
4717 | - sizeof (key_vol_brick)); |
4718 | - snprintf (brickfname, len, "%s:%s", brickinfo->hostname, key_vol_brick); |
4719 | -} |
4720 | - |
4721 | -static void |
4722 | -glusterd_store_brickinfopath_set (glusterd_volinfo_t *volinfo, |
4723 | - glusterd_brickinfo_t *brickinfo, |
4724 | - char *brickpath, size_t len) |
4725 | -{ |
4726 | - char brickfname[PATH_MAX] = {0}; |
4727 | - char brickdirpath[PATH_MAX] = {0,}; |
4728 | - glusterd_conf_t *priv = NULL; |
4729 | - |
4730 | - GF_ASSERT (brickpath); |
4731 | - GF_ASSERT (brickinfo); |
4732 | - GF_ASSERT (len >= PATH_MAX); |
4733 | - |
4734 | - priv = THIS->private; |
4735 | - GF_ASSERT (priv); |
4736 | - |
4737 | - GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv); |
4738 | - glusterd_store_brickinfofname_set (brickinfo, brickfname, |
4739 | - sizeof (brickfname)); |
4740 | - snprintf (brickpath, len, "%s/%s", brickdirpath, brickfname); |
4741 | -} |
4742 | - |
4743 | -gf_boolean_t |
4744 | -glusterd_store_is_valid_brickpath (char *volname, char *brick) |
4745 | -{ |
4746 | - char brickpath[PATH_MAX] = {0}; |
4747 | - glusterd_brickinfo_t *brickinfo = NULL; |
4748 | - glusterd_volinfo_t *volinfo = NULL; |
4749 | - int32_t ret = 0; |
4750 | - size_t volname_len = strlen (volname); |
4751 | - xlator_t *this = NULL; |
4752 | - |
4753 | - this = THIS; |
4754 | - GF_ASSERT (this); |
4755 | - |
4756 | - ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); |
4757 | - if (ret) { |
4758 | - gf_log (this->name, GF_LOG_WARNING, "Failed to create brick " |
4759 | - "info for brick %s", brick); |
4760 | - ret = 0; |
4761 | - goto out; |
4762 | - } |
4763 | - ret = glusterd_volinfo_new (&volinfo); |
4764 | - if (ret) { |
4765 | - gf_log (this->name, GF_LOG_WARNING, "Failed to create volinfo"); |
4766 | - ret = 0; |
4767 | - goto out; |
4768 | - } |
4769 | - if (volname_len >= sizeof (volinfo->volname)) { |
4770 | - gf_log (this->name, GF_LOG_WARNING, "volume name too long"); |
4771 | - ret = 0; |
4772 | - goto out; |
4773 | - } |
4774 | - memcpy (volinfo->volname, volname, volname_len+1); |
4775 | - glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath, |
4776 | - sizeof (brickpath)); |
4777 | - |
4778 | - ret = (strlen (brickpath) < _POSIX_PATH_MAX); |
4779 | - |
4780 | -out: |
4781 | - if (brickinfo) |
4782 | - glusterd_brickinfo_delete (brickinfo); |
4783 | - if (volinfo) |
4784 | - glusterd_volinfo_delete (volinfo); |
4785 | - |
4786 | - return ret; |
4787 | -} |
4788 | - |
4789 | -int32_t |
4790 | -glusterd_store_volinfo_brick_fname_write (int vol_fd, |
4791 | - glusterd_brickinfo_t *brickinfo, |
4792 | - int32_t brick_count) |
4793 | -{ |
4794 | - char key[PATH_MAX] = {0,}; |
4795 | - char brickfname[PATH_MAX] = {0,}; |
4796 | - int32_t ret = -1; |
4797 | - |
4798 | - snprintf (key, sizeof (key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK, |
4799 | - brick_count); |
4800 | - glusterd_store_brickinfofname_set (brickinfo, brickfname, |
4801 | - sizeof (brickfname)); |
4802 | - ret = gf_store_save_value (vol_fd, key, brickfname); |
4803 | - if (ret) |
4804 | - goto out; |
4805 | - |
4806 | -out: |
4807 | - return ret; |
4808 | -} |
4809 | - |
4810 | -int32_t |
4811 | -glusterd_store_create_brick_shandle_on_absence (glusterd_volinfo_t *volinfo, |
4812 | - glusterd_brickinfo_t *brickinfo) |
4813 | -{ |
4814 | - char brickpath[PATH_MAX] = {0,}; |
4815 | - int32_t ret = 0; |
4816 | - |
4817 | - GF_ASSERT (volinfo); |
4818 | - GF_ASSERT (brickinfo); |
4819 | - |
4820 | - glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath, |
4821 | - sizeof (brickpath)); |
4822 | - ret = gf_store_handle_create_on_absence (&brickinfo->shandle, |
4823 | - brickpath); |
4824 | - return ret; |
4825 | -} |
4826 | - |
4827 | -int32_t |
4828 | -glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo) |
4829 | -{ |
4830 | - char value[256] = {0,}; |
4831 | - int32_t ret = 0; |
4832 | - |
4833 | - GF_ASSERT (brickinfo); |
4834 | - GF_ASSERT (fd > 0); |
4835 | - |
4836 | - ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, |
4837 | - brickinfo->hostname); |
4838 | - if (ret) |
4839 | - goto out; |
4840 | - |
4841 | - ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PATH, |
4842 | - brickinfo->path); |
4843 | - if (ret) |
4844 | - goto out; |
4845 | - |
4846 | - snprintf (value, sizeof(value), "%d", brickinfo->port); |
4847 | - ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PORT, value); |
4848 | - |
4849 | - snprintf (value, sizeof(value), "%d", brickinfo->rdma_port); |
4850 | - ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, |
4851 | - value); |
4852 | - |
4853 | - snprintf (value, sizeof(value), "%d", brickinfo->decommissioned); |
4854 | - ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, |
4855 | - value); |
4856 | - if (ret) |
4857 | - goto out; |
4858 | - |
4859 | -out: |
4860 | - gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); |
4861 | - return ret; |
4862 | -} |
4863 | - |
4864 | -int32_t |
4865 | -glusterd_store_perform_brick_store (glusterd_brickinfo_t *brickinfo) |
4866 | -{ |
4867 | - int fd = -1; |
4868 | - int32_t ret = -1; |
4869 | - GF_ASSERT (brickinfo); |
4870 | - |
4871 | - fd = gf_store_mkstemp (brickinfo->shandle); |
4872 | - if (fd <= 0) { |
4873 | - ret = -1; |
4874 | - goto out; |
4875 | - } |
4876 | - |
4877 | - ret = glusterd_store_brickinfo_write (fd, brickinfo); |
4878 | - if (ret) |
4879 | - goto out; |
4880 | - |
4881 | -out: |
4882 | - if (ret && (fd > 0)) |
4883 | - gf_store_unlink_tmppath (brickinfo->shandle); |
4884 | - if (fd > 0) |
4885 | - close (fd); |
4886 | - gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); |
4887 | - return ret; |
4888 | -} |
4889 | - |
4890 | -int32_t |
4891 | -glusterd_store_brickinfo (glusterd_volinfo_t *volinfo, |
4892 | - glusterd_brickinfo_t *brickinfo, int32_t brick_count, |
4893 | - int vol_fd) |
4894 | -{ |
4895 | - int32_t ret = -1; |
4896 | - |
4897 | - GF_ASSERT (volinfo); |
4898 | - GF_ASSERT (brickinfo); |
4899 | - |
4900 | - ret = glusterd_store_volinfo_brick_fname_write (vol_fd, brickinfo, |
4901 | - brick_count); |
4902 | - if (ret) |
4903 | - goto out; |
4904 | - |
4905 | - ret = glusterd_store_create_brick_dir (volinfo); |
4906 | - if (ret) |
4907 | - goto out; |
4908 | - |
4909 | - ret = glusterd_store_create_brick_shandle_on_absence (volinfo, |
4910 | - brickinfo); |
4911 | - if (ret) |
4912 | - goto out; |
4913 | - |
4914 | - ret = glusterd_store_perform_brick_store (brickinfo); |
4915 | -out: |
4916 | - gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); |
4917 | - return ret; |
4918 | -} |
4919 | - |
4920 | -int32_t |
4921 | -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, |
4922 | - glusterd_brickinfo_t *brickinfo) |
4923 | -{ |
4924 | - int32_t ret = -1; |
4925 | - glusterd_conf_t *priv = NULL; |
4926 | - char path[PATH_MAX] = {0,}; |
4927 | - char brickpath[PATH_MAX] = {0,}; |
4928 | - char *ptr = NULL; |
4929 | - char *tmppath = NULL; |
4930 | - xlator_t *this = NULL; |
4931 | - |
4932 | - this = THIS; |
4933 | - GF_ASSERT (this); |
4934 | - GF_ASSERT (volinfo); |
4935 | - GF_ASSERT (brickinfo); |
4936 | - |
4937 | - priv = this->private; |
4938 | - |
4939 | - GF_ASSERT (priv); |
4940 | - |
4941 | - GLUSTERD_GET_BRICK_DIR (path, volinfo, priv); |
4942 | - |
4943 | - tmppath = gf_strdup (brickinfo->path); |
4944 | - |
4945 | - ptr = strchr (tmppath, '/'); |
4946 | - |
4947 | - while (ptr) { |
4948 | - *ptr = '-'; |
4949 | - ptr = strchr (tmppath, '/'); |
4950 | - } |
4951 | - |
4952 | - snprintf (brickpath, sizeof (brickpath), "%s/%s:%s", |
4953 | - path, brickinfo->hostname, tmppath); |
4954 | - |
4955 | - GF_FREE (tmppath); |
4956 | - |
4957 | - ret = unlink (brickpath); |
4958 | - |
4959 | - if ((ret < 0) && (errno != ENOENT)) { |
4960 | - gf_log (this->name, GF_LOG_ERROR, "Unlink failed on %s, " |
4961 | - "reason: %s", brickpath, strerror(errno)); |
4962 | - ret = -1; |
4963 | - goto out; |
4964 | - } else { |
4965 | - ret = 0; |
4966 | - } |
4967 | - |
4968 | -out: |
4969 | - if (brickinfo->shandle) { |
4970 | - gf_store_handle_destroy (brickinfo->shandle); |
4971 | - brickinfo->shandle = NULL; |
4972 | - } |
4973 | - gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); |
4974 | - return ret; |
4975 | -} |
4976 | - |
4977 | -int32_t |
4978 | -glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) |
4979 | -{ |
4980 | - int32_t ret = 0; |
4981 | - glusterd_brickinfo_t *tmp = NULL; |
4982 | - glusterd_conf_t *priv = NULL; |
4983 | - char brickdir [PATH_MAX] = {0,}; |
4984 | - DIR *dir = NULL; |
4985 | - struct dirent *entry = NULL; |
4986 | - char path[PATH_MAX] = {0,}; |
4987 | - xlator_t *this = NULL; |
4988 | - |
4989 | - this = THIS; |
4990 | - GF_ASSERT (this); |
4991 | - |
4992 | - GF_ASSERT (volinfo); |
4993 | - |
4994 | - list_for_each_entry (tmp, &volinfo->bricks, brick_list) { |
4995 | - ret = glusterd_store_delete_brick (volinfo, tmp); |
4996 | - if (ret) |
4997 | - goto out; |
4998 | - } |
4999 | - |
5000 | - priv = this->private; |
The diff has been truncated for viewing.