myri10ge.c

Go to the documentation of this file.
00001 /************************************************* -*- linux-c -*-
00002  * Myricom 10Gb Network Interface Card Software
00003  * Copyright 2009, Myricom, Inc.
00004  *
00005  * This program is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU General Public License,
00007  * version 2, as published by the Free Software Foundation.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  ****************************************************************/
00018 
00019 FILE_LICENCE ( GPL2_ONLY );
00020 
00021 /*
00022  * Author: Glenn Brown <glenn@myri.com>
00023  */
00024 
00025 /*
00026  * General Theory of Operation
00027  *
00028  * This is a minimal Myricom 10 gigabit Ethernet driver for network
00029  * boot.
00030  *
00031  * Initialization
00032  *
00033  * myri10ge_pci_probe() is called by gPXE during initialization.
00034  * Minimal NIC initialization is performed to minimize resources
00035  * consumed when the driver is resident but unused.
00036  *
00037  * Network Boot
00038  *
00039  * myri10ge_net_open() is called by gPXE before attempting to network
00040  * boot from the card.  Packet buffers are allocated and the NIC
00041  * interface is initialized.
00042  *
00043  * Transmit
00044  *
00045  * myri10ge_net_transmit() enqueues frames for transmission by writing
00046  * discriptors to the NIC's tx ring.  For simplicity and to avoid
00047  * copies, we always have the NIC DMA up the packet.  The sent I/O
00048  * buffer is released once the NIC signals myri10ge_interrupt_handler()
00049  * that the send has completed.
00050  *
00051  * Receive
00052  *
00053  * Receives are posted to the NIC's receive ring.  The NIC fills a
00054  * DMAable receive_completion ring with completion notifications.
00055  * myri10ge_net_poll() polls for these receive notifications, posts
00056  * replacement receive buffers to the NIC, and passes received frames
00057  * to netdev_rx().
00058  */
00059 
00060 /*
00061  * Debugging levels:
00062  *      - DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
00063  *        TX overflow, corrupted packets, ...
00064  *      - DBG2() is for successful events, like packet received,
00065  *        packet transmitted, and other general notifications.
00066  *      - DBGP() prints the name of each called function on entry
00067  */
00068 
00069 #include <stdint.h>
00070 
00071 #include <byteswap.h>
00072 #include <errno.h>
00073 #include <gpxe/ethernet.h>
00074 #include <gpxe/if_ether.h>
00075 #include <gpxe/iobuf.h>
00076 #include <gpxe/malloc.h>
00077 #include <gpxe/netdevice.h>
00078 #include <gpxe/pci.h>
00079 #include <gpxe/timer.h>
00080 
00081 #include "myri10ge_mcp.h"
00082 
00083 /****************************************************************
00084  * Forward declarations
00085  ****************************************************************/
00086 
00087 /* PCI driver entry points */
00088 
00089 static int      myri10ge_pci_probe ( struct pci_device*,
00090                                      const struct pci_device_id* );
00091 static void     myri10ge_pci_remove ( struct pci_device* );
00092 
00093 /* Network device operations */
00094 
00095 static void     myri10ge_net_close ( struct net_device* );
00096 static void     myri10ge_net_irq ( struct net_device*, int enable );
00097 static int      myri10ge_net_open ( struct net_device* );
00098 static void     myri10ge_net_poll ( struct net_device* );
00099 static int      myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
00100 
00101 /****************************************************************
00102  * Constants
00103  ****************************************************************/
00104 
00105 /* Maximum ring indices, used to wrap ring indices.  These must be 2**N-1. */
00106 
00107 #define MYRI10GE_TRANSMIT_WRAP                  1U
00108 #define MYRI10GE_RECEIVE_WRAP                   7U
00109 #define MYRI10GE_RECEIVE_COMPLETION_WRAP        31U
00110 
00111 /****************************************************************
00112  * Driver internal data types.
00113  ****************************************************************/
00114 
00115 /* Structure holding all DMA buffers for a NIC, which we will
00116    allocated as contiguous read/write DMAable memory when the NIC is
00117    initialized. */
00118 
00119 struct myri10ge_dma_buffers
00120 {
00121         /* The NIC DMAs receive completion notifications into this ring */
00122 
00123         mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
00124 
00125         /* Interrupt details are DMAd here before interrupting. */
00126 
00127         mcp_irq_data_t irq_data; /* 64B */
00128 
00129         /* NIC command completion status is DMAd here. */
00130 
00131         mcp_cmd_response_t command_response; /* 8B */
00132 };
00133 
00134 struct myri10ge_private
00135 {
00136         /* Interrupt support */
00137 
00138         uint32  *irq_claim;     /* in NIC SRAM */
00139         uint32  *irq_deassert;  /* in NIC SRAM */
00140 
00141         /* DMA buffers. */
00142 
00143         struct myri10ge_dma_buffers     *dma;
00144 
00145         /*
00146          * Transmit state.
00147          *
00148          * The counts here are uint32 for easy comparison with
00149          * priv->dma->irq_data.send_done_count and with each other.
00150          */
00151 
00152         mcp_kreq_ether_send_t   *transmit_ring; /* in NIC SRAM */
00153         uint32                   transmit_ring_wrap;
00154         uint32                   transmits_posted;
00155         uint32                   transmits_done;
00156         struct io_buffer        *transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
00157 
00158         /*
00159          * Receive state.
00160          */
00161 
00162         mcp_kreq_ether_recv_t   *receive_post_ring;     /* in NIC SRAM */
00163         unsigned int             receive_post_ring_wrap;
00164         unsigned int             receives_posted;
00165         unsigned int             receives_done;
00166         struct io_buffer        *receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
00167 
00168         /* Address for writing commands to the firmware.
00169            BEWARE: the value must be written 32 bits at a time. */
00170 
00171         mcp_cmd_t       *command;
00172 };
00173 
00174 /****************************************************************
00175  * Driver internal functions.
00176  ****************************************************************/
00177 
00178 /* Print ring status when debugging.  Use this only after a printed
00179    value changes. */
00180 
00181 #define DBG2_RINGS( priv )                                              \
00182         DBG2 ( "tx %x/%x rx %x/%x in %s() \n",                          \
00183                ( priv ) ->transmits_done, ( priv ) -> transmits_posted, \
00184                ( priv ) ->receives_done, ( priv ) -> receives_posted,   \
00185                __FUNCTION__ )
00186 
00187 /*
00188  * Return a pointer to the driver private data for a network device.
00189  *
00190  * @v netdev    Network device created by this driver.
00191  * @ret priv    The corresponding driver private data.
00192  */
00193 static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
00194 {
00195         /* Our private data always follows the network device in memory,
00196            since we use alloc_netdev() to allocate the storage. */
00197 
00198         return ( struct myri10ge_private * ) ( nd + 1 );
00199 }
00200 
00201 /*
00202  * Pass a receive buffer to the NIC to be filled.
00203  *
00204  * @v priv      The network device to receive the buffer.
00205  * @v iob       The I/O buffer to fill.
00206  *
00207  * Receive buffers are filled in FIFO order.
00208  */
00209 static void myri10ge_post_receive ( struct myri10ge_private *priv,
00210                                     struct io_buffer *iob )
00211 {
00212         unsigned int             receives_posted;
00213         mcp_kreq_ether_recv_t   *request;
00214 
00215         /* Record the posted I/O buffer, to be passed to netdev_rx() on
00216            receive. */
00217 
00218         receives_posted = priv->receives_posted;
00219         priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
00220 
00221         /* Post the receive. */
00222 
00223         request = &priv->receive_post_ring[receives_posted
00224                                            & priv->receive_post_ring_wrap];
00225         request->addr_high = 0;
00226         wmb();
00227         request->addr_low = htonl ( virt_to_bus ( iob->data ) );
00228         priv->receives_posted = ++receives_posted;
00229 }
00230 
00231 /*
00232  * Execute a command on the NIC.
00233  *
00234  * @v priv      NIC to perform the command.
00235  * @v cmd       The command to perform.
00236  * @v data      I/O copy buffer for parameters/results
00237  * @ret rc      0 on success, else an error code.
00238  */
00239 static int myri10ge_command ( struct myri10ge_private *priv,
00240                               uint32 cmd,
00241                               uint32 data[3] )
00242 {
00243         int                              i;
00244         mcp_cmd_t                       *command;
00245         uint32                           result;
00246         unsigned int                     slept_ms;
00247         volatile mcp_cmd_response_t     *response;
00248 
00249         DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
00250         command = priv->command;
00251         response = &priv->dma->command_response;
00252 
00253         /* Mark the command as incomplete. */
00254 
00255         response->result = 0xFFFFFFFF;
00256 
00257         /* Pass the command to the NIC. */
00258 
00259         command->cmd                = htonl ( cmd );
00260         command->data0              = htonl ( data[0] );
00261         command->data1              = htonl ( data[1] );
00262         command->data2              = htonl ( data[2] );
00263         command->response_addr.high = 0;
00264         command->response_addr.low
00265                 = htonl ( virt_to_bus ( &priv->dma->command_response ) );
00266         for ( i=0; i<36; i+=4 )
00267                 * ( uint32 * ) &command->pad[i] = 0;
00268         wmb();
00269         * ( uint32 * ) &command->pad[36] = 0;
00270 
00271         /* Wait up to 2 seconds for a response. */
00272 
00273         for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
00274                 result = response->result;
00275                 if ( result == 0 ) {
00276                         data[0] = ntohl ( response->data );
00277                         return 0;
00278                 } else if ( result != 0xFFFFFFFF ) {
00279                         DBG ( "cmd%d:0x%x\n",
00280                               cmd,
00281                               ntohl ( response->result ) );
00282                         return -EIO;
00283                 }
00284                 udelay ( 1000 );
00285                 rmb();
00286         }
00287         DBG ( "cmd%d:timed out\n", cmd );
00288         return -ETIMEDOUT;
00289 }
00290 
00291 /*
00292  * Handle any pending interrupt.
00293  *
00294  * @v netdev            Device being polled for interrupts.
00295  *
00296  * This is called periodically to let the driver check for interrupts.
00297  */
00298 static void myri10ge_interrupt_handler ( struct net_device *netdev )
00299 {
00300         struct myri10ge_private *priv;
00301         mcp_irq_data_t          *irq_data;
00302         uint8                    valid;
00303 
00304         priv = myri10ge_priv ( netdev );
00305         irq_data = &priv->dma->irq_data;
00306 
00307         /* Return if there was no interrupt. */
00308 
00309         rmb();
00310         valid = irq_data->valid;
00311         if ( !valid )
00312                 return;
00313         DBG2 ( "irq " );
00314 
00315         /* Tell the NIC to deassert the interrupt and clear
00316            irq_data->valid.*/
00317 
00318         *priv->irq_deassert = 0;        /* any value is OK. */
00319         mb();
00320 
00321         /* Handle any new receives. */
00322 
00323         if ( valid & 1 ) {
00324 
00325                 /* Pass the receive interrupt token back to the NIC. */
00326 
00327                 DBG2 ( "rx " );
00328                 *priv->irq_claim = htonl ( 3 );
00329                 wmb();
00330         }
00331 
00332         /* Handle any sent packet by freeing its I/O buffer, now that
00333            we know it has been DMAd. */
00334 
00335         if ( valid & 2 ) {
00336                 unsigned int nic_done_count;
00337 
00338                 DBG2 ( "snt " );
00339                 nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
00340                 while ( priv->transmits_done != nic_done_count ) {
00341                         struct io_buffer *iob;
00342 
00343                         iob = priv->transmit_iob [priv->transmits_done
00344                                                   & MYRI10GE_TRANSMIT_WRAP];
00345                         DBG2 ( "%p ", iob );
00346                         netdev_tx_complete ( netdev, iob );
00347                         ++priv->transmits_done;
00348                 }
00349         }
00350 
00351         /* Record any statistics update. */
00352 
00353         if ( irq_data->stats_updated ) {
00354 
00355                 /* Update the link status. */
00356 
00357                 DBG2 ( "stats " );
00358                 if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
00359                         netdev_link_up ( netdev );
00360                 else
00361                         netdev_link_down ( netdev );
00362 
00363                 /* Ignore all error counters from the NIC. */
00364         }
00365 
00366         /* Wait for the interrupt to be deasserted, as indicated by
00367            irq_data->valid, which is set by the NIC after the deassert. */
00368 
00369         DBG2 ( "wait " );
00370         do {
00371                 mb();
00372         } while ( irq_data->valid );
00373 
00374         /* Claim the interrupt to enable future interrupt generation. */
00375 
00376         DBG2 ( "claim\n" );
00377         * ( priv->irq_claim + 1 ) = htonl ( 3 );
00378         mb();
00379 }
00380 
00381 /* Constants for reading the STRING_SPECS via the Myricom
00382    Vendor Specific PCI configuration space capability. */
00383 
00384 #define VS_ADDR ( vs + 0x18 )
00385 #define VS_DATA ( vs + 0x14 )
00386 #define VS_MODE ( vs + 0x10 )
00387 #define         VS_MODE_READ32 0x3
00388 #define         VS_MODE_LOCATE 0x8
00389 #define                 VS_LOCATE_STRING_SPECS 0x3
00390 
00391 /*
00392  * Read MAC address from its 'string specs' via the vendor-specific
00393  * capability.  (This capability allows NIC SRAM and ROM to be read
00394  * before it is mapped.)
00395  *
00396  * @v pci               The device.
00397  * @v mac               Buffer to store the MAC address.
00398  * @ret rc              Returns 0 on success, else an error code.
00399  */
00400 static int mac_address_from_string_specs ( struct pci_device *pci,
00401                                                    uint8 mac[ETH_ALEN] )
00402 {
00403         char string_specs[256];
00404         char *ptr, *limit;
00405         char *to = string_specs;
00406         uint32 addr;
00407         uint32 len;
00408         unsigned int vs;
00409         int mac_set = 0;
00410 
00411         /* Find the "vendor specific" capability. */
00412 
00413         vs = pci_find_capability ( pci, 9 );
00414         if ( vs == 0 ) {
00415                 DBG ( "no VS\n" );
00416                 return -ENOTSUP;
00417         }
00418 
00419         /* Locate the String specs in LANai SRAM. */
00420 
00421         pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
00422         pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
00423         pci_read_config_dword ( pci, VS_ADDR, &addr );
00424         pci_read_config_dword ( pci, VS_DATA, &len );
00425         DBG2 ( "ss@%x,%x\n", addr, len );
00426 
00427         /* Copy in the string specs.  Use 32-bit reads for performance. */
00428 
00429         if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
00430                 DBG ( "SS too big\n" );
00431                 return -ENOTSUP;
00432         }
00433 
00434         pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
00435         while ( len >= 4 ) {
00436                 uint32 tmp;
00437 
00438                 pci_write_config_byte ( pci, VS_ADDR, addr );
00439                 pci_read_config_dword ( pci, VS_DATA, &tmp );
00440                 tmp = ntohl ( tmp );
00441                 memcpy ( to, &tmp, 4 );
00442                 to += 4;
00443                 addr += 4;
00444                 len -= 4;
00445         }
00446         pci_write_config_byte ( pci, VS_MODE, 0 );
00447 
00448         /* Parse the string specs. */
00449 
00450         DBG2 ( "STRING_SPECS:\n" );
00451         ptr = string_specs;
00452         limit = string_specs + sizeof ( string_specs );
00453         while ( *ptr != '\0' && ptr < limit ) {
00454                 DBG2 ( "%s\n", ptr );
00455                 if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
00456                         unsigned int i;
00457 
00458                         ptr += 4;
00459                         for ( i=0; i<6; i++ ) {
00460                                 if ( ( ptr + 2 ) > limit ) {
00461                                         DBG ( "bad MAC addr\n" );
00462                                         return -ENOTSUP;
00463                                 }
00464                                 mac[i] = strtoul ( ptr, &ptr, 16 );
00465                                 ptr += 1;
00466                         }
00467                         mac_set = 1;
00468                 }
00469                 else
00470                         while ( ptr < limit && *ptr++ );
00471         }
00472 
00473         /* Verify we parsed all we need. */
00474 
00475         if ( !mac_set ) {
00476                 DBG ( "no MAC addr\n" );
00477                 return -ENOTSUP;
00478         }
00479 
00480         DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
00481                mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
00482 
00483         return 0;
00484 }
00485 
00486 /****************************************************************
00487  * gPXE PCI Device Driver API functions
00488  ****************************************************************/
00489 
00490 /*
00491  * Initialize the PCI device.
00492  *
00493  * @v pci               The device's associated pci_device structure.
00494  * @v id                The PCI device + vendor id.
00495  * @ret rc              Returns zero if successfully initialized.
00496  *
00497  * This function is called very early on, while gPXE is initializing.
00498  * This is a gPXE PCI Device Driver API function.
00499  */
00500 static int myri10ge_pci_probe ( struct pci_device *pci,
00501                                 const struct pci_device_id *id __unused )
00502 {
00503         static struct net_device_operations myri10ge_operations = {
00504                 .open     = myri10ge_net_open,
00505                 .close    = myri10ge_net_close,
00506                 .transmit = myri10ge_net_transmit,
00507                 .poll     = myri10ge_net_poll,
00508                 .irq      = myri10ge_net_irq
00509         };
00510 
00511         const char *dbg;
00512         int rc;
00513         struct net_device *netdev;
00514         struct myri10ge_private *priv;
00515 
00516         DBGP ( "myri10ge_pci_probe: " );
00517 
00518         netdev = alloc_etherdev ( sizeof ( *priv ) );
00519         if ( !netdev ) {
00520                 rc = -ENOMEM;
00521                 dbg = "alloc_etherdev";
00522                 goto abort_with_nothing;
00523         }
00524 
00525         netdev_init ( netdev, &myri10ge_operations );
00526         priv = myri10ge_priv ( netdev );
00527 
00528         pci_set_drvdata ( pci, netdev );
00529         netdev->dev = &pci->dev;
00530 
00531         /* Make sure interrupts are disabled. */
00532 
00533         myri10ge_net_irq ( netdev, 0 );
00534 
00535         /* Read the NIC HW address. */
00536 
00537         rc = mac_address_from_string_specs ( pci, netdev->hw_addr );
00538         if ( rc ) {
00539                 dbg = "mac_from_ss";
00540                 goto abort_with_netdev_init;
00541         }
00542         DBGP ( "mac " );
00543 
00544         /* Enable bus master, etc. */
00545 
00546         adjust_pci_device ( pci );
00547         DBGP ( "pci " );
00548 
00549         /* Register the initialized network device. */
00550 
00551         rc = register_netdev ( netdev );
00552         if ( rc ) {
00553                 dbg = "register_netdev";
00554                 goto abort_with_netdev_init;
00555         }
00556 
00557         DBGP ( "done\n" );
00558 
00559         return 0;
00560 
00561 abort_with_netdev_init:
00562         netdev_nullify ( netdev );
00563         netdev_put ( netdev );
00564 abort_with_nothing:
00565         DBG ( "%s:%s\n", dbg, strerror ( rc ) );
00566         return rc;
00567 }
00568 
00569 /*
00570  * Remove a device from the PCI device list.
00571  *
00572  * @v pci               PCI device to remove.
00573  *
00574  * This is a PCI Device Driver API function.
00575  */
00576 static void myri10ge_pci_remove ( struct pci_device *pci )
00577 {
00578         struct net_device       *netdev;
00579 
00580         DBGP ( "myri10ge_pci_remove\n" );
00581         netdev = pci_get_drvdata ( pci );
00582 
00583         unregister_netdev ( netdev );
00584         netdev_nullify ( netdev );
00585         netdev_put ( netdev );
00586 }
00587 
00588 /****************************************************************
00589  * gPXE Network Device Driver Operations
00590  ****************************************************************/
00591 
00592 /*
00593  * Close a network device.
00594  *
00595  * @v netdev            Device to close.
00596  *
00597  * This is a gPXE Network Device Driver API function.
00598  */
00599 static void myri10ge_net_close ( struct net_device *netdev )
00600 {
00601         struct myri10ge_private *priv;
00602         uint32                   data[3];
00603 
00604         DBGP ( "myri10ge_net_close\n" );
00605         priv = myri10ge_priv ( netdev );
00606 
00607         /* disable interrupts */
00608 
00609         myri10ge_net_irq ( netdev, 0 );
00610 
00611         /* Reset the NIC interface, so we won't get any more events from
00612            the NIC. */
00613 
00614         myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
00615 
00616         /* Free receive buffers that were never filled. */
00617 
00618         while ( priv->receives_done != priv->receives_posted ) {
00619                 free_iob ( priv->receive_iob[priv->receives_done
00620                                              & MYRI10GE_RECEIVE_WRAP] );
00621                 ++priv->receives_done;
00622         }
00623 
00624         /* Release DMAable memory. */
00625 
00626         free_dma ( priv->dma, sizeof ( *priv->dma ) );
00627 
00628         /* Erase all state from the open. */
00629 
00630         memset ( priv, 0, sizeof ( *priv ) );
00631 
00632         DBG2_RINGS ( priv );
00633 }
00634 
00635 /*
00636  * Enable or disable IRQ masking.
00637  *
00638  * @v netdev            Device to control.
00639  * @v enable            Zero to mask off IRQ, non-zero to enable IRQ.
00640  *
00641  * This is a gPXE Network Driver API function.
00642  */
00643 static void myri10ge_net_irq ( struct net_device *netdev, int enable )
00644 {
00645         struct pci_device       *pci_dev;
00646         uint16                   val;
00647 
00648         DBGP ( "myri10ge_net_irq\n" );
00649         pci_dev = ( struct pci_device * ) netdev->dev;
00650 
00651         /* Adjust the Interrupt Disable bit in the Command register of the
00652            PCI Device. */
00653 
00654         pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
00655         if ( enable )
00656                 val &= ~PCI_COMMAND_INTX_DISABLE;
00657         else
00658                 val |= PCI_COMMAND_INTX_DISABLE;
00659         pci_write_config_word ( pci_dev, PCI_COMMAND, val );
00660 }
00661 
00662 /*
00663  * Opens a network device.
00664  *
00665  * @v netdev            Device to be opened.
00666  * @ret rc              Non-zero if failed to open.
00667  *
00668  * This enables tx and rx on the device.
00669  * This is a gPXE Network Device Driver API function.
00670  */
00671 static int myri10ge_net_open ( struct net_device *netdev )
00672 {
00673         const char              *dbg;   /* printed upon error return */
00674         int                      rc;
00675         struct io_buffer        *iob;
00676         struct myri10ge_private *priv;
00677         uint32                   data[3];
00678         struct pci_device       *pci_dev;
00679         void                    *membase;
00680 
00681         DBGP ( "myri10ge_net_open\n" );
00682         priv    = myri10ge_priv ( netdev );
00683         pci_dev = ( struct pci_device * ) netdev->dev;
00684         membase = phys_to_virt ( pci_dev->membase );
00685 
00686         /* Compute address for passing commands to the firmware. */
00687 
00688         priv->command = membase + MXGEFW_ETH_CMD;
00689 
00690         /* Ensure interrupts are disabled. */
00691 
00692         myri10ge_net_irq ( netdev, 0 );
00693 
00694         /* Allocate cleared DMAable buffers. */
00695 
00696         priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
00697         if ( !priv->dma ) {
00698                 rc = -ENOMEM;
00699                 dbg = "DMA";
00700                 goto abort_with_nothing;
00701         }
00702         memset ( priv->dma, 0, sizeof ( *priv->dma ) );
00703 
00704         /* Simplify following code. */
00705 
00706 #define TRY( prefix, base, suffix ) do {                \
00707                 rc = myri10ge_command ( priv,           \
00708                                         MXGEFW_         \
00709                                         ## prefix       \
00710                                         ## base         \
00711                                         ## suffix,      \
00712                                         data );         \
00713                 if ( rc ) {                             \
00714                         dbg = #base;                    \
00715                         goto abort_with_dma;            \
00716                 }                                       \
00717         } while ( 0 )
00718 
00719         /* Send a reset command to the card to see if it is alive,
00720            and to reset its queue state. */
00721 
00722         TRY ( CMD_, RESET , );
00723 
00724         /* Set the interrupt queue size. */
00725 
00726         data[0] = ( sizeof ( priv->dma->receive_completion )
00727                     | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
00728         TRY ( CMD_SET_ , INTRQ_SIZE , );
00729 
00730         /* Set the interrupt queue DMA address. */
00731 
00732         data[0] = virt_to_bus ( &priv->dma->receive_completion );
00733         data[1] = 0;
00734         TRY ( CMD_SET_, INTRQ_DMA, );
00735 
00736         /* Get the NIC interrupt claim address. */
00737 
00738         TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
00739         priv->irq_claim = membase + data[0];
00740 
00741         /* Get the NIC interrupt assert address. */
00742 
00743         TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
00744         priv->irq_deassert = membase + data[0];
00745 
00746         /* Disable interrupt coalescing, which is inappropriate for the
00747            minimal buffering we provide. */
00748 
00749         TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
00750         * ( ( uint32 * ) ( membase + data[0] ) ) = 0;
00751 
00752         /* Set the NIC mac address. */
00753 
00754         data[0] = ( netdev->ll_addr[0] << 24
00755                     | netdev->ll_addr[1] << 16
00756                     | netdev->ll_addr[2] << 8
00757                     | netdev->ll_addr[3] );
00758         data[1] = ( ( netdev->ll_addr[4] << 8 )
00759                      | netdev->ll_addr[5] );
00760         TRY ( SET_ , MAC_ADDRESS , );
00761 
00762         /* Enable multicast receives, because some gPXE clients don't work
00763            without multicast. . */
00764 
00765         TRY ( ENABLE_ , ALLMULTI , );
00766 
00767         /* Disable Ethernet flow control, so the NIC cannot deadlock the
00768            network under any circumstances. */
00769 
00770         TRY ( DISABLE_ , FLOW , _CONTROL );
00771 
00772         /* Compute transmit ring sizes. */
00773 
00774         data[0] = 0;            /* slice 0 */
00775         TRY ( CMD_GET_, SEND_RING, _SIZE );
00776         priv->transmit_ring_wrap
00777                 = data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
00778         if ( priv->transmit_ring_wrap
00779              & ( priv->transmit_ring_wrap + 1 ) ) {
00780                 rc = -EPROTO;
00781                 dbg = "TX_RING";
00782                 goto abort_with_dma;
00783         }
00784 
00785         /* Compute receive ring sizes. */
00786 
00787         data[0] = 0;            /* slice 0 */
00788         TRY ( CMD_GET_ , RX_RING , _SIZE );
00789         priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
00790         if ( priv->receive_post_ring_wrap
00791              & ( priv->receive_post_ring_wrap + 1 ) ) {
00792                 rc = -EPROTO;
00793                 dbg = "RX_RING";
00794                 goto abort_with_dma;
00795         }
00796 
00797         /* Get NIC transmit ring address. */
00798 
00799         data[0] = 0;            /* slice 0. */
00800         TRY ( CMD_GET_, SEND, _OFFSET );
00801         priv->transmit_ring = membase + data[0];
00802 
00803         /* Get the NIC receive ring address. */
00804 
00805         data[0] = 0;            /* slice 0. */
00806         TRY ( CMD_GET_, SMALL_RX, _OFFSET );
00807         priv->receive_post_ring = membase + data[0];
00808 
00809         /* Set the Nic MTU. */
00810 
00811         data[0] = ETH_FRAME_LEN;
00812         TRY ( CMD_SET_, MTU, );
00813 
00814         /* Tell the NIC our buffer sizes. ( We use only small buffers, so we
00815            set both buffer sizes to the same value, which will force all
00816            received frames to use small buffers. ) */
00817 
00818         data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
00819         TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
00820         data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
00821         TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
00822 
00823         /* Tell firmware where to DMA IRQ data */
00824 
00825         data[0] = virt_to_bus ( &priv->dma->irq_data );
00826         data[1] = 0;
00827         data[2] = sizeof ( priv->dma->irq_data );
00828         TRY ( CMD_SET_, STATS_DMA_V2, );
00829 
00830         /* Post receives. */
00831 
00832         while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
00833 
00834                 /* Reserve 2 extra bytes at the start of packets, since
00835                    the firmware always skips the first 2 bytes of the buffer
00836                    so TCP headers will be aligned. */
00837 
00838                 iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
00839                 if ( !iob ) {
00840                         rc = -ENOMEM;
00841                         dbg = "alloc_iob";
00842                         goto abort_with_receives_posted;
00843                 }
00844                 iob_reserve ( iob, MXGEFW_PAD );
00845                 myri10ge_post_receive ( priv, iob );
00846         }
00847 
00848         /* Bring up the link. */
00849 
00850         TRY ( CMD_, ETHERNET_UP, );
00851 
00852         DBG2_RINGS ( priv );
00853         return 0;
00854 
00855 abort_with_receives_posted:
00856         while ( priv->receives_posted-- )
00857                 free_iob ( priv->receive_iob[priv->receives_posted] );
00858 abort_with_dma:
00859         /* Because the link is not up, we don't have to reset the NIC here. */
00860         free_dma ( priv->dma, sizeof ( *priv->dma ) );
00861 abort_with_nothing:
00862         /* Erase all signs of the failed open. */
00863         memset ( priv, 0, sizeof ( *priv ) );
00864         DBG ( "%s: %s\n", dbg, strerror ( rc ) );
00865         return ( rc );
00866 }
00867 
00868 /*
00869  * This function allows a driver to process events during operation.
00870  *
00871  * @v netdev            Device being polled.
00872  *
00873  * This is called periodically by gPXE to let the driver check the status of
00874  * transmitted packets and to allow the driver to check for received packets.
00875  * This is a gPXE Network Device Driver API function.
00876  */
00877 static void myri10ge_net_poll ( struct net_device *netdev )
00878 {
00879         struct io_buffer                *iob;
00880         struct io_buffer                *replacement;
00881         struct myri10ge_dma_buffers     *dma;
00882         struct myri10ge_private         *priv;
00883         unsigned int                     length;
00884         unsigned int                     orig_receives_posted;
00885 
00886         DBGP ( "myri10ge_net_poll\n" );
00887         priv = myri10ge_priv ( netdev );
00888         dma  = priv->dma;
00889 
00890         /* Process any pending interrupt. */
00891 
00892         myri10ge_interrupt_handler ( netdev );
00893 
00894         /* Pass up received frames, but limit ourselves to receives posted
00895            before this function was called, so we cannot livelock if
00896            receives are arriving faster than we process them. */
00897 
00898         orig_receives_posted = priv->receives_posted;
00899         while ( priv->receives_done != orig_receives_posted ) {
00900 
00901                 /* Stop if there is no pending receive. */
00902 
00903                 length = ntohs ( dma->receive_completion
00904                                  [priv->receives_done
00905                                   & MYRI10GE_RECEIVE_COMPLETION_WRAP]
00906                                  .length );
00907                 if ( length == 0 )
00908                         break;
00909 
00910                 /* Allocate a replacement buffer.  If none is available,
00911                    stop passing up packets until a buffer is available.
00912 
00913                    Reserve 2 extra bytes at the start of packets, since
00914                    the firmware always skips the first 2 bytes of the buffer
00915                    so TCP headers will be aligned. */
00916 
00917                 replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
00918                 if ( !replacement ) {
00919                         DBG ( "NO RX BUF\n" );
00920                         break;
00921                 }
00922                 iob_reserve ( replacement, MXGEFW_PAD );
00923 
00924                 /* Pass up the received frame. */
00925 
00926                 iob = priv->receive_iob[priv->receives_done
00927                                         & MYRI10GE_RECEIVE_WRAP];
00928                 iob_put ( iob, length );
00929                 netdev_rx ( netdev, iob );
00930 
00931                 /* We have consumed the packet, so clear the receive
00932                    notification. */
00933 
00934                 dma->receive_completion [priv->receives_done
00935                                          & MYRI10GE_RECEIVE_COMPLETION_WRAP]
00936                         .length = 0;
00937                 wmb();
00938 
00939                 /* Replace the passed-up I/O buffer. */
00940 
00941                 myri10ge_post_receive ( priv, replacement );
00942                 ++priv->receives_done;
00943                 DBG2_RINGS ( priv );
00944         }
00945 }
00946 
00947 /*
00948  * This transmits a packet.
00949  *
00950  * @v netdev            Device to transmit from.
00951  * @v iobuf             Data to transmit.
00952  * @ret rc              Non-zero if failed to transmit.
00953  *
00954  * This is a gPXE Network Driver API function.
00955  */
00956 static int myri10ge_net_transmit ( struct net_device *netdev,
00957                                    struct io_buffer *iobuf )
00958 {
00959         mcp_kreq_ether_send_t   *kreq;
00960         size_t                   len;
00961         struct myri10ge_private *priv;
00962         uint32                   transmits_posted;
00963 
00964         DBGP ( "myri10ge_net_transmit\n" );
00965         priv = myri10ge_priv ( netdev );
00966 
00967         /* Confirm space in the send ring. */
00968 
00969         transmits_posted = priv->transmits_posted;
00970         if ( transmits_posted - priv->transmits_done
00971              > MYRI10GE_TRANSMIT_WRAP ) {
00972                 DBG ( "TX ring full\n" );
00973                 return -ENOBUFS;
00974         }
00975 
00976         DBG2 ( "TX %p+%d ", iobuf->data, iob_len ( iobuf ) );
00977         DBG2_HD ( iobuf->data, 14 );
00978 
00979         /* Record the packet being transmitted, so we can later report
00980            send completion. */
00981 
00982         priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
00983 
00984         /* Copy and pad undersized frames, because the NIC does not pad,
00985            and we would rather copy small frames than do a gather. */
00986 
00987         len = iob_len ( iobuf );
00988         if ( len < ETH_ZLEN ) {
00989                 iob_pad ( iobuf, ETH_ZLEN );
00990                 len = ETH_ZLEN;
00991         }
00992 
00993         /* Enqueue the packet by writing a descriptor to the NIC.
00994            This is a bit tricky because the HW requires 32-bit writes,
00995            but the structure has smaller fields. */
00996 
00997         kreq = &priv->transmit_ring[transmits_posted
00998                                     & priv->transmit_ring_wrap];
00999         kreq->addr_high = 0;
01000         kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
01001         ( ( uint32 * ) kreq ) [2] = htonl (
01002                 0x0000 << 16     /* pseudo_header_offset */
01003                 | ( len & 0xFFFF ) /* length */
01004                 );
01005         wmb();
01006         ( ( uint32 * ) kreq ) [3] = htonl (
01007                 0x00 << 24      /* pad */
01008                 | 0x01 << 16    /* rdma_count */
01009                 | 0x00 << 8     /* cksum_offset */
01010                 | ( MXGEFW_FLAGS_SMALL
01011                     | MXGEFW_FLAGS_FIRST
01012                     | MXGEFW_FLAGS_NO_TSO ) /* flags */
01013                 );
01014         wmb();
01015 
01016         /* Mark the slot as consumed and return. */
01017 
01018         priv->transmits_posted = ++transmits_posted;
01019         DBG2_RINGS ( priv );
01020         return 0;
01021 }
01022 
01023 static struct pci_device_id myri10ge_nics[] = {
01024         /* Each of these macros must be a single line to satisfy a script. */
01025         PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
01026 };
01027 
01028 struct pci_driver myri10ge_driver __pci_driver = {
01029         .ids      = myri10ge_nics,
01030         .id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
01031         .probe    = myri10ge_pci_probe,
01032         .remove   = myri10ge_pci_remove
01033 };
01034 
01035 /*
01036  * Local variables:
01037  *  c-basic-offset: 8
01038  *  c-indent-level: 8
01039  *  tab-width: 8
01040  * End:
01041  */

Generated on Tue Apr 6 20:01:01 2010 for gPXE by  doxygen 1.5.7.1