ipoib.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License as
00006  * published by the Free Software Foundation; either version 2 of the
00007  * License, or any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00017  */
00018 
00019 FILE_LICENCE ( GPL2_OR_LATER );
00020 
00021 #include <stdint.h>
00022 #include <stdio.h>
00023 #include <unistd.h>
00024 #include <string.h>
00025 #include <byteswap.h>
00026 #include <errno.h>
00027 #include <gpxe/errortab.h>
00028 #include <gpxe/if_arp.h>
00029 #include <gpxe/iobuf.h>
00030 #include <gpxe/netdevice.h>
00031 #include <gpxe/infiniband.h>
00032 #include <gpxe/ib_pathrec.h>
00033 #include <gpxe/ib_mcast.h>
00034 #include <gpxe/ipoib.h>
00035 
00036 /** @file
00037  *
00038  * IP over Infiniband
00039  */
00040 
00041 /** Number of IPoIB send work queue entries */
00042 #define IPOIB_NUM_SEND_WQES 2
00043 
00044 /** Number of IPoIB receive work queue entries */
00045 #define IPOIB_NUM_RECV_WQES 4
00046 
00047 /** Number of IPoIB completion entries */
00048 #define IPOIB_NUM_CQES 8
00049 
00050 /** An IPoIB device */
00051 struct ipoib_device {
00052         /** Network device */
00053         struct net_device *netdev;
00054         /** Underlying Infiniband device */
00055         struct ib_device *ibdev;
00056         /** Completion queue */
00057         struct ib_completion_queue *cq;
00058         /** Queue pair */
00059         struct ib_queue_pair *qp;
00060         /** Broadcast MAC */
00061         struct ipoib_mac broadcast;
00062         /** Joined to IPv4 broadcast multicast group
00063          *
00064          * This flag indicates whether or not we have initiated the
00065          * join to the IPv4 broadcast multicast group.
00066          */
00067         int broadcast_joined;
00068         /** IPv4 broadcast multicast group membership */
00069         struct ib_mc_membership broadcast_membership;
00070 };
00071 
00072 /** Broadcast IPoIB address */
00073 static struct ipoib_mac ipoib_broadcast = {
00074         .flags__qpn = htonl ( IB_QPN_BROADCAST ),
00075         .gid.u.bytes =  { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
00076                           0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
00077 };
00078 
00079 /** Link status for "broadcast join in progress" */
00080 #define EINPROGRESS_JOINING ( EINPROGRESS | EUNIQ_01 )
00081 
00082 /** Human-readable message for the link status */
00083 struct errortab ipoib_errors[] __errortab = {
00084         { EINPROGRESS_JOINING, "Joining" },
00085 };
00086 
00087 /****************************************************************************
00088  *
00089  * IPoIB peer cache
00090  *
00091  ****************************************************************************
00092  */
00093 
00094 /**
00095  * IPoIB peer address
00096  *
00097  * The IPoIB link-layer header is only four bytes long and so does not
00098  * have sufficient room to store IPoIB MAC address(es).  We therefore
00099  * maintain a cache of MAC addresses identified by a single-byte key,
00100  * and abuse the spare two bytes within the link-layer header to
00101  * communicate these MAC addresses between the link-layer code and the
00102  * netdevice driver.
00103  */
00104 struct ipoib_peer {
00105         /** Key */
00106         uint8_t key;
00107         /** MAC address */
00108         struct ipoib_mac mac;
00109 };
00110 
00111 /** Number of IPoIB peer cache entries
00112  *
00113  * Must be a power of two.
00114  */
00115 #define IPOIB_NUM_CACHED_PEERS 4
00116 
00117 /** IPoIB peer address cache */
00118 static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
00119 
00120 /** Oldest IPoIB peer cache entry index */
00121 static unsigned int ipoib_peer_cache_idx = 1;
00122 
00123 /**
00124  * Look up cached peer by key
00125  *
00126  * @v key               Peer cache key
00127  * @ret peer            Peer cache entry, or NULL
00128  */
00129 static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
00130         struct ipoib_peer *peer;
00131         unsigned int i;
00132 
00133         for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
00134                 peer = &ipoib_peer_cache[i];
00135                 if ( peer->key == key )
00136                         return peer;
00137         }
00138 
00139         if ( key != 0 ) {
00140                 DBG ( "IPoIB warning: peer cache lost track of key %x while "
00141                       "still in use\n", key );
00142         }
00143         return NULL;
00144 }
00145 
00146 /**
00147  * Store GID and QPN in peer cache
00148  *
00149  * @v mac               Peer MAC address
00150  * @ret peer            Peer cache entry
00151  */
00152 static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
00153         struct ipoib_peer *peer;
00154         unsigned int key;
00155         unsigned int i;
00156 
00157         /* Look for existing cache entry */
00158         for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
00159                 peer = &ipoib_peer_cache[i];
00160                 if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
00161                         return peer;
00162         }
00163 
00164         /* No entry found: create a new one */
00165         key = ipoib_peer_cache_idx++;
00166         peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
00167         if ( peer->key )
00168                 DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
00169 
00170         memset ( peer, 0, sizeof ( *peer ) );
00171         peer->key = key;
00172         memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
00173         DBG ( "IPoIB peer %x has MAC %s\n",
00174               peer->key, ipoib_ntoa ( &peer->mac ) );
00175         return peer;
00176 }
00177 
00178 /****************************************************************************
00179  *
00180  * IPoIB link layer
00181  *
00182  ****************************************************************************
00183  */
00184 
00185 /**
00186  * Add IPoIB link-layer header
00187  *
00188  * @v netdev            Network device
00189  * @v iobuf             I/O buffer
00190  * @v ll_dest           Link-layer destination address
00191  * @v ll_source         Source link-layer address
00192  * @v net_proto         Network-layer protocol, in network-byte order
00193  * @ret rc              Return status code
00194  */
00195 static int ipoib_push ( struct net_device *netdev __unused,
00196                         struct io_buffer *iobuf, const void *ll_dest,
00197                         const void *ll_source __unused, uint16_t net_proto ) {
00198         struct ipoib_hdr *ipoib_hdr =
00199                 iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
00200         const struct ipoib_mac *dest_mac = ll_dest;
00201         const struct ipoib_mac *src_mac = ll_source;
00202         struct ipoib_peer *dest;
00203         struct ipoib_peer *src;
00204 
00205         /* Add link-layer addresses to cache */
00206         dest = ipoib_cache_peer ( dest_mac );
00207         src = ipoib_cache_peer ( src_mac );
00208 
00209         /* Build IPoIB header */
00210         ipoib_hdr->proto = net_proto;
00211         ipoib_hdr->u.peer.dest = dest->key;
00212         ipoib_hdr->u.peer.src = src->key;
00213 
00214         return 0;
00215 }
00216 
00217 /**
00218  * Remove IPoIB link-layer header
00219  *
00220  * @v netdev            Network device
00221  * @v iobuf             I/O buffer
00222  * @ret ll_dest         Link-layer destination address
00223  * @ret ll_source       Source link-layer address
00224  * @ret net_proto       Network-layer protocol, in network-byte order
00225  * @ret rc              Return status code
00226  */
00227 static int ipoib_pull ( struct net_device *netdev,
00228                         struct io_buffer *iobuf, const void **ll_dest,
00229                         const void **ll_source, uint16_t *net_proto ) {
00230         struct ipoib_device *ipoib = netdev->priv;
00231         struct ipoib_hdr *ipoib_hdr = iobuf->data;
00232         struct ipoib_peer *dest;
00233         struct ipoib_peer *source;
00234 
00235         /* Sanity check */
00236         if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
00237                 DBG ( "IPoIB packet too short for link-layer header\n" );
00238                 DBG_HD ( iobuf->data, iob_len ( iobuf ) );
00239                 return -EINVAL;
00240         }
00241 
00242         /* Strip off IPoIB header */
00243         iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
00244 
00245         /* Identify source and destination addresses, and clear
00246          * reserved word in IPoIB header
00247          */
00248         dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
00249         source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
00250         ipoib_hdr->u.reserved = 0;
00251 
00252         /* Fill in required fields */
00253         *ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
00254         *ll_source = ( source ? &source->mac : &ipoib->broadcast );
00255         *net_proto = ipoib_hdr->proto;
00256 
00257         return 0;
00258 }
00259 
00260 /**
00261  * Initialise IPoIB link-layer address
00262  *
00263  * @v hw_addr           Hardware address
00264  * @v ll_addr           Link-layer address
00265  */
00266 static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
00267         const struct ib_gid_half *guid = hw_addr;
00268         struct ipoib_mac *mac = ll_addr;
00269 
00270         memset ( mac, 0, sizeof ( *mac ) );
00271         memcpy ( &mac->gid.u.half[1], guid, sizeof ( mac->gid.u.half[1] ) );
00272 }
00273 
00274 /**
00275  * Transcribe IPoIB link-layer address
00276  *
00277  * @v ll_addr   Link-layer address
00278  * @ret string  Link-layer address in human-readable format
00279  */
00280 const char * ipoib_ntoa ( const void *ll_addr ) {
00281         static char buf[45];
00282         const struct ipoib_mac *mac = ll_addr;
00283 
00284         snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
00285                    htonl ( mac->flags__qpn ), htonl ( mac->gid.u.dwords[0] ),
00286                    htonl ( mac->gid.u.dwords[1] ),
00287                    htonl ( mac->gid.u.dwords[2] ),
00288                    htonl ( mac->gid.u.dwords[3] ) );
00289         return buf;
00290 }
00291 
00292 /**
00293  * Hash multicast address
00294  *
00295  * @v af                Address family
00296  * @v net_addr          Network-layer address
00297  * @v ll_addr           Link-layer address to fill in
00298  * @ret rc              Return status code
00299  */
00300 static int ipoib_mc_hash ( unsigned int af __unused,
00301                            const void *net_addr __unused,
00302                            void *ll_addr __unused ) {
00303 
00304         return -ENOTSUP;
00305 }
00306 
00307 /**
00308  * Generate Mellanox Ethernet-compatible compressed link-layer address
00309  *
00310  * @v ll_addr           Link-layer address
00311  * @v eth_addr          Ethernet-compatible address to fill in
00312  */
00313 static int ipoib_mlx_eth_addr ( const struct ib_gid_half *guid,
00314                                 uint8_t *eth_addr ) {
00315         eth_addr[0] = ( ( guid->u.bytes[3] == 2 ) ? 0x00 : 0x02 );
00316         eth_addr[1] = guid->u.bytes[1];
00317         eth_addr[2] = guid->u.bytes[2];
00318         eth_addr[3] = guid->u.bytes[5];
00319         eth_addr[4] = guid->u.bytes[6];
00320         eth_addr[5] = guid->u.bytes[7];
00321         return 0;
00322 }
00323 
00324 /** An IPoIB Ethernet-compatible compressed link-layer address generator */
00325 struct ipoib_eth_addr_handler {
00326         /** GUID byte 1 */
00327         uint8_t byte1;
00328         /** GUID byte 2 */
00329         uint8_t byte2;
00330         /** Handler */
00331         int ( * eth_addr ) ( const struct ib_gid_half *guid,
00332                              uint8_t *eth_addr );
00333 };
00334 
00335 /** IPoIB Ethernet-compatible compressed link-layer address generators */
00336 static struct ipoib_eth_addr_handler ipoib_eth_addr_handlers[] = {
00337         { 0x02, 0xc9, ipoib_mlx_eth_addr },
00338 };
00339 
00340 /**
00341  * Generate Ethernet-compatible compressed link-layer address
00342  *
00343  * @v ll_addr           Link-layer address
00344  * @v eth_addr          Ethernet-compatible address to fill in
00345  */
00346 static int ipoib_eth_addr ( const void *ll_addr, void *eth_addr ) {
00347         const struct ipoib_mac *ipoib_addr = ll_addr;
00348         const struct ib_gid_half *guid = &ipoib_addr->gid.u.half[1];
00349         struct ipoib_eth_addr_handler *handler;
00350         unsigned int i;
00351 
00352         for ( i = 0 ; i < ( sizeof ( ipoib_eth_addr_handlers ) /
00353                             sizeof ( ipoib_eth_addr_handlers[0] ) ) ; i++ ) {
00354                 handler = &ipoib_eth_addr_handlers[i];
00355                 if ( ( handler->byte1 == guid->u.bytes[1] ) &&
00356                      ( handler->byte2 == guid->u.bytes[2] ) ) {
00357                         return handler->eth_addr ( guid, eth_addr );
00358                 }
00359         }
00360         return -ENOTSUP;
00361 }
00362 
00363 /** IPoIB protocol */
00364 struct ll_protocol ipoib_protocol __ll_protocol = {
00365         .name           = "IPoIB",
00366         .ll_proto       = htons ( ARPHRD_INFINIBAND ),
00367         .hw_addr_len    = sizeof ( struct ib_gid_half ),
00368         .ll_addr_len    = IPOIB_ALEN,
00369         .ll_header_len  = IPOIB_HLEN,
00370         .push           = ipoib_push,
00371         .pull           = ipoib_pull,
00372         .init_addr      = ipoib_init_addr,
00373         .ntoa           = ipoib_ntoa,
00374         .mc_hash        = ipoib_mc_hash,
00375         .eth_addr       = ipoib_eth_addr,
00376 };
00377 
00378 /**
00379  * Allocate IPoIB device
00380  *
00381  * @v priv_size         Size of driver private data
00382  * @ret netdev          Network device, or NULL
00383  */
00384 struct net_device * alloc_ipoibdev ( size_t priv_size ) {
00385         struct net_device *netdev;
00386 
00387         netdev = alloc_netdev ( priv_size );
00388         if ( netdev ) {
00389                 netdev->ll_protocol = &ipoib_protocol;
00390                 netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
00391                 netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
00392         }
00393         return netdev;
00394 }
00395 
00396 /****************************************************************************
00397  *
00398  * IPoIB network device
00399  *
00400  ****************************************************************************
00401  */
00402 
00403 /**
00404  * Transmit packet via IPoIB network device
00405  *
00406  * @v netdev            Network device
00407  * @v iobuf             I/O buffer
00408  * @ret rc              Return status code
00409  */
00410 static int ipoib_transmit ( struct net_device *netdev,
00411                             struct io_buffer *iobuf ) {
00412         struct ipoib_device *ipoib = netdev->priv;
00413         struct ib_device *ibdev = ipoib->ibdev;
00414         struct ipoib_hdr *ipoib_hdr;
00415         struct ipoib_peer *dest;
00416         struct ib_address_vector av;
00417         int rc;
00418 
00419         /* Sanity check */
00420         if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
00421                 DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
00422                 return -EINVAL;
00423         }
00424         ipoib_hdr = iobuf->data;
00425 
00426         /* Attempting transmission while link is down will put the
00427          * queue pair into an error state, so don't try it.
00428          */
00429         if ( ! ib_link_ok ( ibdev ) )
00430                 return -ENETUNREACH;
00431 
00432         /* Identify destination address */
00433         dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
00434         if ( ! dest )
00435                 return -ENXIO;
00436         ipoib_hdr->u.reserved = 0;
00437 
00438         /* Construct address vector */
00439         memset ( &av, 0, sizeof ( av ) );
00440         av.qpn = ( ntohl ( dest->mac.flags__qpn ) & IB_QPN_MASK );
00441         av.gid_present = 1;
00442         memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
00443         if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
00444                 /* Path not resolved yet */
00445                 return rc;
00446         }
00447 
00448         return ib_post_send ( ibdev, ipoib->qp, &av, iobuf );
00449 }
00450 
00451 /**
00452  * Handle IPoIB send completion
00453  *
00454  * @v ibdev             Infiniband device
00455  * @v qp                Queue pair
00456  * @v iobuf             I/O buffer
00457  * @v rc                Completion status code
00458  */
00459 static void ipoib_complete_send ( struct ib_device *ibdev __unused,
00460                                   struct ib_queue_pair *qp,
00461                                   struct io_buffer *iobuf, int rc ) {
00462         struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
00463 
00464         netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
00465 }
00466 
00467 /**
00468  * Handle IPoIB receive completion
00469  *
00470  * @v ibdev             Infiniband device
00471  * @v qp                Queue pair
00472  * @v av                Address vector, or NULL
00473  * @v iobuf             I/O buffer
00474  * @v rc                Completion status code
00475  */
00476 static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
00477                                   struct ib_queue_pair *qp,
00478                                   struct ib_address_vector *av,
00479                                   struct io_buffer *iobuf, int rc ) {
00480         struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
00481         struct net_device *netdev = ipoib->netdev;
00482         struct ipoib_hdr *ipoib_hdr;
00483         struct ipoib_mac ll_src;
00484         struct ipoib_peer *src;
00485 
00486         if ( rc != 0 ) {
00487                 netdev_rx_err ( netdev, iobuf, rc );
00488                 return;
00489         }
00490 
00491         /* Sanity check */
00492         if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
00493                 DBGC ( ipoib, "IPoIB %p received packet too short to "
00494                        "contain IPoIB header\n", ipoib );
00495                 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
00496                 netdev_rx_err ( netdev, iobuf, -EIO );
00497                 return;
00498         }
00499         ipoib_hdr = iobuf->data;
00500 
00501         /* Parse source address */
00502         if ( av->gid_present ) {
00503                 ll_src.flags__qpn = htonl ( av->qpn );
00504                 memcpy ( &ll_src.gid, &av->gid, sizeof ( ll_src.gid ) );
00505                 src = ipoib_cache_peer ( &ll_src );
00506                 ipoib_hdr->u.peer.src = src->key;
00507         }
00508 
00509         /* Hand off to network layer */
00510         netdev_rx ( netdev, iobuf );
00511 }
00512 
00513 /** IPoIB completion operations */
00514 static struct ib_completion_queue_operations ipoib_cq_op = {
00515         .complete_send = ipoib_complete_send,
00516         .complete_recv = ipoib_complete_recv,
00517 };
00518 
00519 /**
00520  * Poll IPoIB network device
00521  *
00522  * @v netdev            Network device
00523  */
00524 static void ipoib_poll ( struct net_device *netdev ) {
00525         struct ipoib_device *ipoib = netdev->priv;
00526         struct ib_device *ibdev = ipoib->ibdev;
00527 
00528         ib_poll_eq ( ibdev );
00529 }
00530 
00531 /**
00532  * Enable/disable interrupts on IPoIB network device
00533  *
00534  * @v netdev            Network device
00535  * @v enable            Interrupts should be enabled
00536  */
00537 static void ipoib_irq ( struct net_device *netdev __unused,
00538                         int enable __unused ) {
00539         /* No implementation */
00540 }
00541 
00542 /**
00543  * Handle IPv4 broadcast multicast group join completion
00544  *
00545  * @v ibdev             Infiniband device
00546  * @v qp                Queue pair
00547  * @v membership        Multicast group membership
00548  * @v rc                Status code
00549  * @v mad               Response MAD (or NULL on error)
00550  */
00551 void ipoib_join_complete ( struct ib_device *ibdev __unused,
00552                            struct ib_queue_pair *qp __unused,
00553                            struct ib_mc_membership *membership, int rc,
00554                            union ib_mad *mad __unused ) {
00555         struct ipoib_device *ipoib = container_of ( membership,
00556                                    struct ipoib_device, broadcast_membership );
00557 
00558         /* Record join status as link status */
00559         netdev_link_err ( ipoib->netdev, rc );
00560 }
00561 
00562 /**
00563  * Join IPv4 broadcast multicast group
00564  *
00565  * @v ipoib             IPoIB device
00566  * @ret rc              Return status code
00567  */
00568 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
00569         int rc;
00570 
00571         if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
00572                                     &ipoib->broadcast_membership,
00573                                     &ipoib->broadcast.gid,
00574                                     ipoib_join_complete ) ) != 0 ) {
00575                 DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
00576                        ipoib, strerror ( rc ) );
00577                 return rc;
00578         }
00579         ipoib->broadcast_joined = 1;
00580 
00581         return 0;
00582 }
00583 
00584 /**
00585  * Leave IPv4 broadcast multicast group
00586  *
00587  * @v ipoib             IPoIB device
00588  */
00589 static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
00590 
00591         if ( ipoib->broadcast_joined ) {
00592                 ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
00593                                  &ipoib->broadcast_membership );
00594                 ipoib->broadcast_joined = 0;
00595         }
00596 }
00597 
00598 /**
00599  * Open IPoIB network device
00600  *
00601  * @v netdev            Network device
00602  * @ret rc              Return status code
00603  */
00604 static int ipoib_open ( struct net_device *netdev ) {
00605         struct ipoib_device *ipoib = netdev->priv;
00606         struct ib_device *ibdev = ipoib->ibdev;
00607         struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
00608         int rc;
00609 
00610         /* Open IB device */
00611         if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
00612                 DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
00613                        ipoib, strerror ( rc ) );
00614                 goto err_ib_open;
00615         }
00616 
00617         /* Allocate completion queue */
00618         ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op );
00619         if ( ! ipoib->cq ) {
00620                 DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
00621                        ipoib );
00622                 rc = -ENOMEM;
00623                 goto err_create_cq;
00624         }
00625 
00626         /* Allocate queue pair */
00627         ipoib->qp = ib_create_qp ( ibdev, IB_QPT_UD,
00628                                    IPOIB_NUM_SEND_WQES, ipoib->cq,
00629                                    IPOIB_NUM_RECV_WQES, ipoib->cq );
00630         if ( ! ipoib->qp ) {
00631                 DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
00632                        ipoib );
00633                 rc = -ENOMEM;
00634                 goto err_create_qp;
00635         }
00636         ib_qp_set_ownerdata ( ipoib->qp, ipoib );
00637 
00638         /* Update MAC address with QPN */
00639         mac->flags__qpn = htonl ( ipoib->qp->qpn );
00640 
00641         /* Fill receive rings */
00642         ib_refill_recv ( ibdev, ipoib->qp );
00643 
00644         /* Fake a link status change to join the broadcast group */
00645         ipoib_link_state_changed ( ibdev );
00646 
00647         return 0;
00648 
00649         ib_destroy_qp ( ibdev, ipoib->qp );
00650  err_create_qp:
00651         ib_destroy_cq ( ibdev, ipoib->cq );
00652  err_create_cq:
00653         ib_close ( ibdev );
00654  err_ib_open:
00655         return rc;
00656 }
00657 
00658 /**
00659  * Close IPoIB network device
00660  *
00661  * @v netdev            Network device
00662  */
00663 static void ipoib_close ( struct net_device *netdev ) {
00664         struct ipoib_device *ipoib = netdev->priv;
00665         struct ib_device *ibdev = ipoib->ibdev;
00666         struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
00667 
00668         /* Leave broadcast group */
00669         ipoib_leave_broadcast_group ( ipoib );
00670 
00671         /* Remove QPN from MAC address */
00672         mac->flags__qpn = 0;
00673 
00674         /* Tear down the queues */
00675         ib_destroy_qp ( ibdev, ipoib->qp );
00676         ib_destroy_cq ( ibdev, ipoib->cq );
00677 
00678         /* Close IB device */
00679         ib_close ( ibdev );
00680 }
00681 
00682 /** IPoIB network device operations */
00683 static struct net_device_operations ipoib_operations = {
00684         .open           = ipoib_open,
00685         .close          = ipoib_close,
00686         .transmit       = ipoib_transmit,
00687         .poll           = ipoib_poll,
00688         .irq            = ipoib_irq,
00689 };
00690 
00691 /**
00692  * Handle link status change
00693  *
00694  * @v ibdev             Infiniband device
00695  */
00696 void ipoib_link_state_changed ( struct ib_device *ibdev ) {
00697         struct net_device *netdev = ib_get_ownerdata ( ibdev );
00698         struct ipoib_device *ipoib = netdev->priv;
00699         struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
00700         int rc;
00701 
00702         /* Leave existing broadcast group */
00703         ipoib_leave_broadcast_group ( ipoib );
00704 
00705         /* Update MAC address based on potentially-new GID prefix */
00706         memcpy ( &mac->gid.u.half[0], &ibdev->gid.u.half[0],
00707                  sizeof ( mac->gid.u.half[0] ) );
00708 
00709         /* Update broadcast GID based on potentially-new partition key */
00710         ipoib->broadcast.gid.u.words[2] =
00711                 htons ( ibdev->pkey | IB_PKEY_FULL );
00712 
00713         /* Set net device link state to reflect Infiniband link state */
00714         rc = ib_link_rc ( ibdev );
00715         netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
00716 
00717         /* Join new broadcast group */
00718         if ( ib_link_ok ( ibdev ) &&
00719              ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
00720                 DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
00721                        "%s\n", ipoib, strerror ( rc ) );
00722                 netdev_link_err ( netdev, rc );
00723                 return;
00724         }
00725 }
00726 
00727 /**
00728  * Probe IPoIB device
00729  *
00730  * @v ibdev             Infiniband device
00731  * @ret rc              Return status code
00732  */
00733 int ipoib_probe ( struct ib_device *ibdev ) {
00734         struct net_device *netdev;
00735         struct ipoib_device *ipoib;
00736         int rc;
00737 
00738         /* Allocate network device */
00739         netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
00740         if ( ! netdev )
00741                 return -ENOMEM;
00742         netdev_init ( netdev, &ipoib_operations );
00743         ipoib = netdev->priv;
00744         ib_set_ownerdata ( ibdev, netdev );
00745         netdev->dev = ibdev->dev;
00746         memset ( ipoib, 0, sizeof ( *ipoib ) );
00747         ipoib->netdev = netdev;
00748         ipoib->ibdev = ibdev;
00749 
00750         /* Extract hardware address */
00751         memcpy ( netdev->hw_addr, &ibdev->gid.u.half[1],
00752                  sizeof ( ibdev->gid.u.half[1] ) );
00753 
00754         /* Set default broadcast address */
00755         memcpy ( &ipoib->broadcast, &ipoib_broadcast,
00756                  sizeof ( ipoib->broadcast ) );
00757         netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
00758 
00759         /* Register network device */
00760         if ( ( rc = register_netdev ( netdev ) ) != 0 )
00761                 goto err_register_netdev;
00762 
00763         return 0;
00764 
00765  err_register_netdev:
00766         netdev_nullify ( netdev );
00767         netdev_put ( netdev );
00768         return rc;
00769 }
00770 
00771 /**
00772  * Remove IPoIB device
00773  *
00774  * @v ibdev             Infiniband device
00775  */
00776 void ipoib_remove ( struct ib_device *ibdev ) {
00777         struct net_device *netdev = ib_get_ownerdata ( ibdev );
00778 
00779         unregister_netdev ( netdev );
00780         netdev_nullify ( netdev );
00781         netdev_put ( netdev );
00782 }

Generated on Tue Apr 6 20:01:01 2010 for gPXE by  doxygen 1.5.7.1