00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 FILE_LICENCE ( GPL2_OR_LATER );
00020
00021 #include <stdint.h>
00022 #include <stdio.h>
00023 #include <unistd.h>
00024 #include <string.h>
00025 #include <byteswap.h>
00026 #include <errno.h>
00027 #include <gpxe/errortab.h>
00028 #include <gpxe/if_arp.h>
00029 #include <gpxe/iobuf.h>
00030 #include <gpxe/netdevice.h>
00031 #include <gpxe/infiniband.h>
00032 #include <gpxe/ib_pathrec.h>
00033 #include <gpxe/ib_mcast.h>
00034 #include <gpxe/ipoib.h>
00035
00036
00037
00038
00039
00040
00041
00042 #define IPOIB_NUM_SEND_WQES 2
00043
00044
00045 #define IPOIB_NUM_RECV_WQES 4
00046
00047
00048 #define IPOIB_NUM_CQES 8
00049
00050
00051 struct ipoib_device {
00052
00053 struct net_device *netdev;
00054
00055 struct ib_device *ibdev;
00056
00057 struct ib_completion_queue *cq;
00058
00059 struct ib_queue_pair *qp;
00060
00061 struct ipoib_mac broadcast;
00062
00063
00064
00065
00066
00067 int broadcast_joined;
00068
00069 struct ib_mc_membership broadcast_membership;
00070 };
00071
00072
00073 static struct ipoib_mac ipoib_broadcast = {
00074 .flags__qpn = htonl ( IB_QPN_BROADCAST ),
00075 .gid.u.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
00076 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
00077 };
00078
00079
00080 #define EINPROGRESS_JOINING ( EINPROGRESS | EUNIQ_01 )
00081
00082
00083 struct errortab ipoib_errors[] __errortab = {
00084 { EINPROGRESS_JOINING, "Joining" },
00085 };
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104 struct ipoib_peer {
00105
00106 uint8_t key;
00107
00108 struct ipoib_mac mac;
00109 };
00110
00111
00112
00113
00114
00115 #define IPOIB_NUM_CACHED_PEERS 4
00116
00117
00118 static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
00119
00120
00121 static unsigned int ipoib_peer_cache_idx = 1;
00122
00123
00124
00125
00126
00127
00128
00129 static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
00130 struct ipoib_peer *peer;
00131 unsigned int i;
00132
00133 for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
00134 peer = &ipoib_peer_cache[i];
00135 if ( peer->key == key )
00136 return peer;
00137 }
00138
00139 if ( key != 0 ) {
00140 DBG ( "IPoIB warning: peer cache lost track of key %x while "
00141 "still in use\n", key );
00142 }
00143 return NULL;
00144 }
00145
00146
00147
00148
00149
00150
00151
00152 static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
00153 struct ipoib_peer *peer;
00154 unsigned int key;
00155 unsigned int i;
00156
00157
00158 for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
00159 peer = &ipoib_peer_cache[i];
00160 if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
00161 return peer;
00162 }
00163
00164
00165 key = ipoib_peer_cache_idx++;
00166 peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
00167 if ( peer->key )
00168 DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
00169
00170 memset ( peer, 0, sizeof ( *peer ) );
00171 peer->key = key;
00172 memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
00173 DBG ( "IPoIB peer %x has MAC %s\n",
00174 peer->key, ipoib_ntoa ( &peer->mac ) );
00175 return peer;
00176 }
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195 static int ipoib_push ( struct net_device *netdev __unused,
00196 struct io_buffer *iobuf, const void *ll_dest,
00197 const void *ll_source __unused, uint16_t net_proto ) {
00198 struct ipoib_hdr *ipoib_hdr =
00199 iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
00200 const struct ipoib_mac *dest_mac = ll_dest;
00201 const struct ipoib_mac *src_mac = ll_source;
00202 struct ipoib_peer *dest;
00203 struct ipoib_peer *src;
00204
00205
00206 dest = ipoib_cache_peer ( dest_mac );
00207 src = ipoib_cache_peer ( src_mac );
00208
00209
00210 ipoib_hdr->proto = net_proto;
00211 ipoib_hdr->u.peer.dest = dest->key;
00212 ipoib_hdr->u.peer.src = src->key;
00213
00214 return 0;
00215 }
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227 static int ipoib_pull ( struct net_device *netdev,
00228 struct io_buffer *iobuf, const void **ll_dest,
00229 const void **ll_source, uint16_t *net_proto ) {
00230 struct ipoib_device *ipoib = netdev->priv;
00231 struct ipoib_hdr *ipoib_hdr = iobuf->data;
00232 struct ipoib_peer *dest;
00233 struct ipoib_peer *source;
00234
00235
00236 if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
00237 DBG ( "IPoIB packet too short for link-layer header\n" );
00238 DBG_HD ( iobuf->data, iob_len ( iobuf ) );
00239 return -EINVAL;
00240 }
00241
00242
00243 iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
00244
00245
00246
00247
00248 dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
00249 source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
00250 ipoib_hdr->u.reserved = 0;
00251
00252
00253 *ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
00254 *ll_source = ( source ? &source->mac : &ipoib->broadcast );
00255 *net_proto = ipoib_hdr->proto;
00256
00257 return 0;
00258 }
00259
00260
00261
00262
00263
00264
00265
00266 static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
00267 const struct ib_gid_half *guid = hw_addr;
00268 struct ipoib_mac *mac = ll_addr;
00269
00270 memset ( mac, 0, sizeof ( *mac ) );
00271 memcpy ( &mac->gid.u.half[1], guid, sizeof ( mac->gid.u.half[1] ) );
00272 }
00273
00274
00275
00276
00277
00278
00279
00280 const char * ipoib_ntoa ( const void *ll_addr ) {
00281 static char buf[45];
00282 const struct ipoib_mac *mac = ll_addr;
00283
00284 snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
00285 htonl ( mac->flags__qpn ), htonl ( mac->gid.u.dwords[0] ),
00286 htonl ( mac->gid.u.dwords[1] ),
00287 htonl ( mac->gid.u.dwords[2] ),
00288 htonl ( mac->gid.u.dwords[3] ) );
00289 return buf;
00290 }
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300 static int ipoib_mc_hash ( unsigned int af __unused,
00301 const void *net_addr __unused,
00302 void *ll_addr __unused ) {
00303
00304 return -ENOTSUP;
00305 }
00306
00307
00308
00309
00310
00311
00312
00313 static int ipoib_mlx_eth_addr ( const struct ib_gid_half *guid,
00314 uint8_t *eth_addr ) {
00315 eth_addr[0] = ( ( guid->u.bytes[3] == 2 ) ? 0x00 : 0x02 );
00316 eth_addr[1] = guid->u.bytes[1];
00317 eth_addr[2] = guid->u.bytes[2];
00318 eth_addr[3] = guid->u.bytes[5];
00319 eth_addr[4] = guid->u.bytes[6];
00320 eth_addr[5] = guid->u.bytes[7];
00321 return 0;
00322 }
00323
00324
00325 struct ipoib_eth_addr_handler {
00326
00327 uint8_t byte1;
00328
00329 uint8_t byte2;
00330
00331 int ( * eth_addr ) ( const struct ib_gid_half *guid,
00332 uint8_t *eth_addr );
00333 };
00334
00335
00336 static struct ipoib_eth_addr_handler ipoib_eth_addr_handlers[] = {
00337 { 0x02, 0xc9, ipoib_mlx_eth_addr },
00338 };
00339
00340
00341
00342
00343
00344
00345
00346 static int ipoib_eth_addr ( const void *ll_addr, void *eth_addr ) {
00347 const struct ipoib_mac *ipoib_addr = ll_addr;
00348 const struct ib_gid_half *guid = &ipoib_addr->gid.u.half[1];
00349 struct ipoib_eth_addr_handler *handler;
00350 unsigned int i;
00351
00352 for ( i = 0 ; i < ( sizeof ( ipoib_eth_addr_handlers ) /
00353 sizeof ( ipoib_eth_addr_handlers[0] ) ) ; i++ ) {
00354 handler = &ipoib_eth_addr_handlers[i];
00355 if ( ( handler->byte1 == guid->u.bytes[1] ) &&
00356 ( handler->byte2 == guid->u.bytes[2] ) ) {
00357 return handler->eth_addr ( guid, eth_addr );
00358 }
00359 }
00360 return -ENOTSUP;
00361 }
00362
00363
00364 struct ll_protocol ipoib_protocol __ll_protocol = {
00365 .name = "IPoIB",
00366 .ll_proto = htons ( ARPHRD_INFINIBAND ),
00367 .hw_addr_len = sizeof ( struct ib_gid_half ),
00368 .ll_addr_len = IPOIB_ALEN,
00369 .ll_header_len = IPOIB_HLEN,
00370 .push = ipoib_push,
00371 .pull = ipoib_pull,
00372 .init_addr = ipoib_init_addr,
00373 .ntoa = ipoib_ntoa,
00374 .mc_hash = ipoib_mc_hash,
00375 .eth_addr = ipoib_eth_addr,
00376 };
00377
00378
00379
00380
00381
00382
00383
00384 struct net_device * alloc_ipoibdev ( size_t priv_size ) {
00385 struct net_device *netdev;
00386
00387 netdev = alloc_netdev ( priv_size );
00388 if ( netdev ) {
00389 netdev->ll_protocol = &ipoib_protocol;
00390 netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
00391 netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
00392 }
00393 return netdev;
00394 }
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 static int ipoib_transmit ( struct net_device *netdev,
00411 struct io_buffer *iobuf ) {
00412 struct ipoib_device *ipoib = netdev->priv;
00413 struct ib_device *ibdev = ipoib->ibdev;
00414 struct ipoib_hdr *ipoib_hdr;
00415 struct ipoib_peer *dest;
00416 struct ib_address_vector av;
00417 int rc;
00418
00419
00420 if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
00421 DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
00422 return -EINVAL;
00423 }
00424 ipoib_hdr = iobuf->data;
00425
00426
00427
00428
00429 if ( ! ib_link_ok ( ibdev ) )
00430 return -ENETUNREACH;
00431
00432
00433 dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
00434 if ( ! dest )
00435 return -ENXIO;
00436 ipoib_hdr->u.reserved = 0;
00437
00438
00439 memset ( &av, 0, sizeof ( av ) );
00440 av.qpn = ( ntohl ( dest->mac.flags__qpn ) & IB_QPN_MASK );
00441 av.gid_present = 1;
00442 memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
00443 if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
00444
00445 return rc;
00446 }
00447
00448 return ib_post_send ( ibdev, ipoib->qp, &av, iobuf );
00449 }
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459 static void ipoib_complete_send ( struct ib_device *ibdev __unused,
00460 struct ib_queue_pair *qp,
00461 struct io_buffer *iobuf, int rc ) {
00462 struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
00463
00464 netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
00465 }
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476 static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
00477 struct ib_queue_pair *qp,
00478 struct ib_address_vector *av,
00479 struct io_buffer *iobuf, int rc ) {
00480 struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
00481 struct net_device *netdev = ipoib->netdev;
00482 struct ipoib_hdr *ipoib_hdr;
00483 struct ipoib_mac ll_src;
00484 struct ipoib_peer *src;
00485
00486 if ( rc != 0 ) {
00487 netdev_rx_err ( netdev, iobuf, rc );
00488 return;
00489 }
00490
00491
00492 if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
00493 DBGC ( ipoib, "IPoIB %p received packet too short to "
00494 "contain IPoIB header\n", ipoib );
00495 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
00496 netdev_rx_err ( netdev, iobuf, -EIO );
00497 return;
00498 }
00499 ipoib_hdr = iobuf->data;
00500
00501
00502 if ( av->gid_present ) {
00503 ll_src.flags__qpn = htonl ( av->qpn );
00504 memcpy ( &ll_src.gid, &av->gid, sizeof ( ll_src.gid ) );
00505 src = ipoib_cache_peer ( &ll_src );
00506 ipoib_hdr->u.peer.src = src->key;
00507 }
00508
00509
00510 netdev_rx ( netdev, iobuf );
00511 }
00512
00513
00514 static struct ib_completion_queue_operations ipoib_cq_op = {
00515 .complete_send = ipoib_complete_send,
00516 .complete_recv = ipoib_complete_recv,
00517 };
00518
00519
00520
00521
00522
00523
00524 static void ipoib_poll ( struct net_device *netdev ) {
00525 struct ipoib_device *ipoib = netdev->priv;
00526 struct ib_device *ibdev = ipoib->ibdev;
00527
00528 ib_poll_eq ( ibdev );
00529 }
00530
00531
00532
00533
00534
00535
00536
00537 static void ipoib_irq ( struct net_device *netdev __unused,
00538 int enable __unused ) {
00539
00540 }
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551 void ipoib_join_complete ( struct ib_device *ibdev __unused,
00552 struct ib_queue_pair *qp __unused,
00553 struct ib_mc_membership *membership, int rc,
00554 union ib_mad *mad __unused ) {
00555 struct ipoib_device *ipoib = container_of ( membership,
00556 struct ipoib_device, broadcast_membership );
00557
00558
00559 netdev_link_err ( ipoib->netdev, rc );
00560 }
00561
00562
00563
00564
00565
00566
00567
00568 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
00569 int rc;
00570
00571 if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
00572 &ipoib->broadcast_membership,
00573 &ipoib->broadcast.gid,
00574 ipoib_join_complete ) ) != 0 ) {
00575 DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
00576 ipoib, strerror ( rc ) );
00577 return rc;
00578 }
00579 ipoib->broadcast_joined = 1;
00580
00581 return 0;
00582 }
00583
00584
00585
00586
00587
00588
00589 static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
00590
00591 if ( ipoib->broadcast_joined ) {
00592 ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
00593 &ipoib->broadcast_membership );
00594 ipoib->broadcast_joined = 0;
00595 }
00596 }
00597
00598
00599
00600
00601
00602
00603
00604 static int ipoib_open ( struct net_device *netdev ) {
00605 struct ipoib_device *ipoib = netdev->priv;
00606 struct ib_device *ibdev = ipoib->ibdev;
00607 struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
00608 int rc;
00609
00610
00611 if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
00612 DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
00613 ipoib, strerror ( rc ) );
00614 goto err_ib_open;
00615 }
00616
00617
00618 ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op );
00619 if ( ! ipoib->cq ) {
00620 DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
00621 ipoib );
00622 rc = -ENOMEM;
00623 goto err_create_cq;
00624 }
00625
00626
00627 ipoib->qp = ib_create_qp ( ibdev, IB_QPT_UD,
00628 IPOIB_NUM_SEND_WQES, ipoib->cq,
00629 IPOIB_NUM_RECV_WQES, ipoib->cq );
00630 if ( ! ipoib->qp ) {
00631 DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
00632 ipoib );
00633 rc = -ENOMEM;
00634 goto err_create_qp;
00635 }
00636 ib_qp_set_ownerdata ( ipoib->qp, ipoib );
00637
00638
00639 mac->flags__qpn = htonl ( ipoib->qp->qpn );
00640
00641
00642 ib_refill_recv ( ibdev, ipoib->qp );
00643
00644
00645 ipoib_link_state_changed ( ibdev );
00646
00647 return 0;
00648
00649 ib_destroy_qp ( ibdev, ipoib->qp );
00650 err_create_qp:
00651 ib_destroy_cq ( ibdev, ipoib->cq );
00652 err_create_cq:
00653 ib_close ( ibdev );
00654 err_ib_open:
00655 return rc;
00656 }
00657
00658
00659
00660
00661
00662
00663 static void ipoib_close ( struct net_device *netdev ) {
00664 struct ipoib_device *ipoib = netdev->priv;
00665 struct ib_device *ibdev = ipoib->ibdev;
00666 struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
00667
00668
00669 ipoib_leave_broadcast_group ( ipoib );
00670
00671
00672 mac->flags__qpn = 0;
00673
00674
00675 ib_destroy_qp ( ibdev, ipoib->qp );
00676 ib_destroy_cq ( ibdev, ipoib->cq );
00677
00678
00679 ib_close ( ibdev );
00680 }
00681
00682
00683 static struct net_device_operations ipoib_operations = {
00684 .open = ipoib_open,
00685 .close = ipoib_close,
00686 .transmit = ipoib_transmit,
00687 .poll = ipoib_poll,
00688 .irq = ipoib_irq,
00689 };
00690
00691
00692
00693
00694
00695
00696 void ipoib_link_state_changed ( struct ib_device *ibdev ) {
00697 struct net_device *netdev = ib_get_ownerdata ( ibdev );
00698 struct ipoib_device *ipoib = netdev->priv;
00699 struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
00700 int rc;
00701
00702
00703 ipoib_leave_broadcast_group ( ipoib );
00704
00705
00706 memcpy ( &mac->gid.u.half[0], &ibdev->gid.u.half[0],
00707 sizeof ( mac->gid.u.half[0] ) );
00708
00709
00710 ipoib->broadcast.gid.u.words[2] =
00711 htons ( ibdev->pkey | IB_PKEY_FULL );
00712
00713
00714 rc = ib_link_rc ( ibdev );
00715 netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
00716
00717
00718 if ( ib_link_ok ( ibdev ) &&
00719 ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
00720 DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
00721 "%s\n", ipoib, strerror ( rc ) );
00722 netdev_link_err ( netdev, rc );
00723 return;
00724 }
00725 }
00726
00727
00728
00729
00730
00731
00732
00733 int ipoib_probe ( struct ib_device *ibdev ) {
00734 struct net_device *netdev;
00735 struct ipoib_device *ipoib;
00736 int rc;
00737
00738
00739 netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
00740 if ( ! netdev )
00741 return -ENOMEM;
00742 netdev_init ( netdev, &ipoib_operations );
00743 ipoib = netdev->priv;
00744 ib_set_ownerdata ( ibdev, netdev );
00745 netdev->dev = ibdev->dev;
00746 memset ( ipoib, 0, sizeof ( *ipoib ) );
00747 ipoib->netdev = netdev;
00748 ipoib->ibdev = ibdev;
00749
00750
00751 memcpy ( netdev->hw_addr, &ibdev->gid.u.half[1],
00752 sizeof ( ibdev->gid.u.half[1] ) );
00753
00754
00755 memcpy ( &ipoib->broadcast, &ipoib_broadcast,
00756 sizeof ( ipoib->broadcast ) );
00757 netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
00758
00759
00760 if ( ( rc = register_netdev ( netdev ) ) != 0 )
00761 goto err_register_netdev;
00762
00763 return 0;
00764
00765 err_register_netdev:
00766 netdev_nullify ( netdev );
00767 netdev_put ( netdev );
00768 return rc;
00769 }
00770
00771
00772
00773
00774
00775
00776 void ipoib_remove ( struct ib_device *ibdev ) {
00777 struct net_device *netdev = ib_get_ownerdata ( ibdev );
00778
00779 unregister_netdev ( netdev );
00780 netdev_nullify ( netdev );
00781 netdev_put ( netdev );
00782 }