view e1000_initialize_reception.patch @ 80:0354cceee710

WIP on e1000, try to add the code to free the RX ring when shutting the interface down
author Louis Opter <louis@lse.epitech.net>
date Sun, 15 Apr 2012 02:53:38 +0200
parents f07f6c6d6cd4
children
line wrap: on
line source

# HG changeset patch
# Parent d8fce7ee2b630bf8bd681fa4d66832b82c5e6de2
rathaxes: initialize reception on the e1000 sample:

- This is documented in details in the sections 14.4 and 3.2 of the
  Intel Gigabit Controller Software Developer manual;
- "Address filtering" is set up, address filters just tell the hardware
  which packets they should accept (unicast/multicast/vlan/promisc), we
  simply configure the hardware to accept packet for its own mac
  address (receive address);
- It involves setting up a ring of receive descriptors (their format is
  documented in section 3.2.3) and an internal data structure to keep
  track of the ring;
- Each descriptor of the ring correspond to an skbuff (skbuff are
  allocated individually).

diff --git a/rathaxes/samples/e1000/e1000.blt b/rathaxes/samples/e1000/e1000.blt
--- a/rathaxes/samples/e1000/e1000.blt
+++ b/rathaxes/samples/e1000/e1000.blt
@@ -1,5 +1,81 @@
 with e1000, Ethernet, Socket, PCI, LKM, Log
 {
+    template type   e1000::RxDescriptor()
+    {
+        chunk   LKM::includes()
+        {
+            typedef int ${e1000::RxDescriptor};
+
+            #include <linux/types.h>
+
+            static const ${e1000::RxDescriptor} force_rtx_e1000_rx_descriptor_decl;
+        }
+
+        chunk   ::decl()
+        {
+            typedef struct rtx_e1000_rx_descriptor
+            {
+                /* actual types are in comments */
+                unsigned long int   /* __le64 */    buff_addr;
+                unsigned short      /* __le16 */    length;
+                unsigned short      /* __le16 */    csum;
+                unsigned char                       status;
+                unsigned char                       errors;
+                unsigned short      /* __le16 */    special;
+            } *rtx_e1000_rx_descriptor_p;
+        }
+
+        chunk   ::init()
+        {
+        }
+
+        map
+        {
+        }
+    }
+
+    /*
+     * Ring of e1000::RxDescriptors and their corresponding skbuffs.
+     *
+     * - size: total size of the ring in bytes.
+     * - base: address of the ring (we can't use the typedef here until we get
+     *   CNorm unstrict);
+     * - dma_base: (physical) address of the ring where the device can access
+     *   the different descriptors;
+     * - skbuffs: array of the skbuffs associated with each descriptor;
+     * - dma_skbuffs: (physical) address of each skbuff where the device can
+     *   write the received packets;
+     */
+    template type   e1000::RxRing()
+    {
+        chunk   LKM::includes()
+        {
+            typedef int ${e1000::RxRing};
+
+            static const ${e1000::RxRing}   force_rtx_e1000_rx_ring_decl;
+        }
+
+        chunk   ::decl()
+        {
+            struct rtx_e1000_rx_ring
+            {
+                unsigned int                    size;
+                struct rtx_e1000_rx_descriptor  *base;
+                void*   /* dma_addr_t */        dma_base;
+                struct sk_buff                  *skbuffs[256 /* ${config.rx_ring_size} */];
+                void*   /* dma_addr_t */        dma_skbuffs[256 /* ${config.rx_ring_size} */];
+            };
+        }
+
+        chunk   ::init()
+        {
+        }
+
+        map
+        {
+        }
+    }
+
     template type   e1000::Context()
     {
         chunk   LKM::includes()
@@ -21,9 +97,12 @@
              */
             typedef struct rtx_e1000_ctx
             {
-                int                             bars;
-                unsigned char /* __iomem */     *ioaddr;
-                int                             irq;
+                int                         bars;
+                unsigned char /* __iomem */ *ioaddr;
+                int                         irq;
+
+                /* we can't use the Rathaxes type here (#8) */
+                struct rtx_e1000_rx_ring    rx_ring;
             } *rtx_e1000_ctx_p;
         }
 
@@ -34,6 +113,7 @@
         map
         {
         }
+
     }
 
     template type   e1000::Register()
@@ -64,6 +144,9 @@
                 E1000_FCT           = 0x00030, /* Flow Control Type */
                 E1000_FCTTV         = 0x00170, /* Flow Control Transmit Timer Value */
                 E1000_CRCERRS       = 0x04000, /* CRC Error Count (base address of the statistic register spaces) */
+                E1000_RAL           = 0x05400, /* Receive Address Low */
+                E1000_RAH           = 0x05404, /* Receive Address High */
+                E1000_MTA           = 0x05200, /* Multicast Table Array */
             };
         }
 
@@ -132,7 +215,8 @@
                 E1000_INTR_RXDMT0               = 0x00000010, /* rx desc min. threshold (0) */
                 E1000_INTR_RXO                  = 0x00000040, /* rx overrun */
                 E1000_INTR_RXT0                 = 0x00000080, /* rx timer intr (ring 0) */
-                E1000_INTR_MDAC                 = 0x00000200  /* MDIO access complete */
+                E1000_INTR_MDAC                 = 0x00000200, /* MDIO access complete */
+                E1000_RAH_AV                    = (1 << 31),  /* Set the MAC Address as Valid */
             };
         }
 
@@ -457,15 +541,27 @@
     {
         chunk  ::CALL()
         {
+            typedef unsigned long int   dma_addr_t;
+
             /*
-             * This is documented in the Intel Gigabit Ethernet Controller
-             * Software Developper manual.
+             * This part is documented in the Intel Gigabit Ethernet Controller
+             * Software Developper manual. (You can find it in the doc/hardware
+             * directory).
              *
              * Since this part is actually completely device specific it should
              * not be written here. (but in the front-end).
              */
 
             /*
+             * shortcut hw_ctx... maybe we should directly take an
+             * e1000::Context? (but we would need to make it point back to
+             * the struct net_device)
+             */
+            typedef int       ${e1000::Context};
+            ${e1000::Context} hw_ctx;
+            hw_ctx = &${ctx}->hw_ctx;
+
+            /*
              * "General Configuration" (section 14.3):
              *
              * - CTRL.ASDE/CTRL.SLU: Let the PHY handle the speed detection &
@@ -478,22 +574,181 @@
              * - Finally, initialize all the statistic registers from
              *   E1000_CRCERRS to E1000_TSCTFC.
              */
-            rtx_e1000_register_set32(&${ctx}->hw_ctx, E1000_CTRL,
+
+            rtx_e1000_register_set32(hw_ctx, E1000_CTRL,
                                      E1000_CMD_ASDE |
                                      E1000_CMD_SLU);
-            rtx_e1000_register_unset32(&${ctx}->hw_ctx, E1000_CTRL,
+            rtx_e1000_register_unset32(hw_ctx, E1000_CTRL,
                                        E1000_CMD_LRST    |
                                        E1000_CMD_FRCSPD  |
                                        E1000_CMD_PHY_RST |
                                        E1000_CMD_ILOS    |
                                        E1000_CMD_VME);
-            rtx_e1000_register_write32(&${ctx}->hw_ctx, E1000_FCAH, 0);
-            rtx_e1000_register_write32(&${ctx}->hw_ctx, E1000_FCAL, 0);
-            rtx_e1000_register_write32(&${ctx}->hw_ctx, E1000_FCT, 0);
-            rtx_e1000_register_write32(&${ctx}->hw_ctx, E1000_FCTTV, 0);
+            rtx_e1000_register_write32(hw_ctx, E1000_FCAH, 0);
+            rtx_e1000_register_write32(hw_ctx, E1000_FCAL, 0);
+            rtx_e1000_register_write32(hw_ctx, E1000_FCT, 0);
+            rtx_e1000_register_write32(hw_ctx, E1000_FCTTV, 0);
+            int i = 0; /* CNorm workaround, the init part of for isn't generated */
+            for (i = 0; i != 64; ++i)
+                rtx_e1000_register_write32(hw_ctx, E1000_CRCERRS + i * 4, 0);
+
+            {
+                ${Log::info("setup_device: general configuration done")};
+            }
+
+            /*
+             * Receive initialization:
+             *
+             * 1. Program the receive address, in RAL/RAH;
+             * 2. Initialize the Multicast Table Array;
+             * 3. Program the interrupt mask register (done in
+             *    e1000::activate_device_interruption);
+             * 4. Allocate the receive descriptor ring and map it to make it
+             *    accessible by the device.
+             *
+             * The ugly casts in here are caused by the lack of CNorm unstrict.
+             */
+
+            /* 1. Program the receive address */
+
+            /* (We should use uint{32,16}_t but CNorm doesn't know them yet) */
+            rtx_e1000_register_write32(hw_ctx, E1000_RAL,
+                    *(unsigned int *)(${ctx}->net_dev->dev_addr));
+            /*
+             * The 16 upper bits of RAH also store the AS bits (which should be
+             * 0) and the AV bit (should be 1 to set the address as valid).
+             */
+            rtx_e1000_register_write32(hw_ctx, E1000_RAH,
+                    *(unsigned short *)(&${ctx}->net_dev->dev_addr[4]));
+            rtx_e1000_register_set32(hw_ctx, E1000_RAH, E1000_RAH_AV);
+
+            {
+                ${Log::info("setup_device: program receieve address done")};
+            }
+
+            /* 2. Initialize the MTA */
+
+            i = 0; /* CNorm workaround, the init part of for isn't generated */
+            for (i = 0; i != 128; ++i)
+                rtx_e1000_register_write32(hw_ctx, E1000_MTA + i * 4, 0);
+
+            {
+                ${Log::info("setup_device: MTA init done")};
+            }
+
+            /* 4. Setup the receive descriptor ring */
+
+            /* Allocate the descriptors */
+            hw_ctx->rx_ring.size = ${config.rx_ring_size} * sizeof(*hw_ctx->rx_ring.base);
+            hw_ctx->rx_ring.size = ALIGN(hw_ctx->rx_ring.size, 4096);
+            hw_ctx->rx_ring.base = dma_alloc_coherent(
+                    &${ctx}->pci_dev->dev,
+                    hw_ctx->rx_ring.size,
+                    (dma_addr_t *)&hw_ctx->rx_ring.dma_base,
+                    GFP_KERNEL);
+            if (!hw_ctx->rx_ring.base)
+            {
+                ${Log::info("cannot allocate the descriptors for the rx ring")};
+                goto err_rx_ring_alloc;
+            }
+
+            {
+                ${Log::info("setup_device: descriptors allocated")};
+            }
+
+            /*
+             * Allocate the skbuffs, map them for DMA, and write their address
+             * in the corresponding descriptor.
+             */
+            i = 0;
+            for (i = 0; i != ${config.rx_ring_size}; ++i)
+            {
+                hw_ctx->rx_ring.skbuffs[i] = netdev_alloc_skb(
+                        ${ctx}->net_dev,
+                        ${config.rx_buffer_len});
+                if (!hw_ctx->rx_ring.skbuffs[i])
+                {
+                    ${Log::info("cannot allocate a skbuff for the rx ring")};
+                    goto err_skbuffs_alloc;
+                }
+                hw_ctx->rx_ring.dma_skbuffs[i] = (void *)dma_map_single(
+                        &${ctx}->pci_dev->dev,
+                        hw_ctx->rx_ring.skbuffs[i]->data,
+                        ${config.rx_buffer_len},
+                        DMA_FROM_DEVICE);
+                /*
+                 * Either this fails because, when compiling with gcc because
+                 * the last argument is not of the correct type (dma_addr_t).
+                 * Or it fails because of the lack of CNorm Unstrict.
+                 */
+                if (dma_mapping_error(&${ctx}->pci_dev->dev, (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i]))
+                {
+                    ${Log::info("cannot dma-map a skbuff for the rx ring")};
+                    goto err_skbuffs_map;
+                }
+                hw_ctx->rx_ring.base[i].buff_addr = (unsigned long int)cpu_to_le64(
+                        hw_ctx->rx_ring.dma_skbuffs[i]);
+            }
+
+            {
+                ${Log::info("setup_device: skbuffs allocated")};
+            }
+
+            /*
+             * XXX: We can't return here since we are not in a function but in
+             * a chunk of code (injected in a function).
+             */
+            goto ok;
+
+            /*
+             * Likewise, always the same problem with error handling, we don't
+             * know where we are at in the "parent context":
+             */
+        err_skbuffs_alloc:
+            while (i--)
+            {
+                dma_unmap_single(
+                        &${ctx}->pci_dev->dev,
+                        (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i],
+                        ${config.rx_buffer_len},
+                        DMA_FROM_DEVICE);
+        err_skbuffs_map:
+                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i]);
+            }
+
+            dma_free_coherent(&${ctx}->pci_dev->dev, hw_ctx->rx_ring.size,
+                    hw_ctx->rx_ring.base, (dma_addr_t)hw_ctx->rx_ring.dma_base);
+        err_rx_ring_alloc:
+            return -ENOMEM;
+
+        ok:
+        }
+    }
+
+    template sequence   e1000::free_rx_tx(Ethernet::Device ctx)
+    {
+        chunk   ::CALL()
+        {
+            typedef unsigned long int   dma_addr_t;
+
+            typedef int       ${e1000::Context};
+            ${e1000::Context} hw_ctx;
+            hw_ctx = &${ctx}->hw_ctx;
+
+            /* Free the rx ring: */
             int i = 0;
-            for (i = 0; i != 64; ++i)
-                rtx_e1000_register_write32(&${ctx}->hw_ctx, E1000_CRCERRS + i * 4, 0);
+            for (i = 0; i != ${config.rx_ring_size}; ++i)
+            {
+                dma_unmap_single(
+                        &${ctx}->pci_dev->dev,
+                        (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i],
+                        ${config.rx_buffer_len},
+                        DMA_FROM_DEVICE);
+                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i]);
+            }
+            dma_free_coherent(&${ctx}->pci_dev->dev, hw_ctx->rx_ring.size,
+                    hw_ctx->rx_ring.base, (dma_addr_t)hw_ctx->rx_ring.dma_base);
+
         }
     }
 
diff --git a/rathaxes/samples/e1000/e1000.rti b/rathaxes/samples/e1000/e1000.rti
--- a/rathaxes/samples/e1000/e1000.rti
+++ b/rathaxes/samples/e1000/e1000.rti
@@ -1,6 +1,9 @@
 interface e1000 : Socket, Ethernet, PCI, LKM
 {
     provided type   e1000::Context;
+    provided type   e1000::RxDescriptor;
+    provided type   e1000::RxRing;
+
     /*
      * These two types should actually be registers definitions in the frontend:
      */
@@ -55,6 +58,11 @@
         provided chunk  ::CALL();
     }
 
+    provided sequence   e1000::free_rx_tx(Ethernet::Device dev)
+    {
+        provided chunk  ::CALL();
+    }
+
     provided sequence   e1000::handle_interrupt(Ethernet::Device)
     {
         provided chunk  ::CALL();
diff --git a/rathaxes/samples/e1000/ethernet.blt b/rathaxes/samples/e1000/ethernet.blt
--- a/rathaxes/samples/e1000/ethernet.blt
+++ b/rathaxes/samples/e1000/ethernet.blt
@@ -64,7 +64,6 @@
             static int  rtx_ethernet_open(struct net_device *dev)
             {
                 struct rtx_ethernet_dev* rtx_ether_dev = netdev_priv(dev);
-                struct rtx_e1000_ctx* ctx = &rtx_ether_dev->hw_ctx;
 
                 ${cast local.rtx_ether_dev as Ethernet::Device};
                 ${pointcut ::IMPLEMENTATION(local.rtx_ether_dev)};
@@ -105,8 +104,7 @@
         {
             static int  rtx_ethernet_close(struct net_device *dev)
             {
-                struct rtx_ethernet_dev* rtx_ether_dev =  netdev_priv(dev);
-                struct rtx_e1000_ctx* ctx = &rtx_ether_dev->hw_ctx;
+                struct rtx_ethernet_dev* rtx_ether_dev = netdev_priv(dev);
 
                 ${cast local.rtx_ether_dev as Ethernet::Device};
                 ${pointcut ::IMPLEMENTATION(local.rtx_ether_dev)};
diff --git a/rathaxes/samples/e1000/lkm.rtx b/rathaxes/samples/e1000/lkm.rtx
--- a/rathaxes/samples/e1000/lkm.rtx
+++ b/rathaxes/samples/e1000/lkm.rtx
@@ -24,7 +24,17 @@
     Ethernet::close(Ethernet::Device dev)
     {
         Log::info("closing the device");
+
+        /*
+         * Note: some calls to release resources must be done when IRQs are
+         * enabled (dma_free_coherent() for example). So we have to cleanup our
+         * stuff before free_interrupt_handler().
+         */
+        e1000::free_rx_tx(dev);
+        Log::info("free'ed up skbuffs");
+
         e1000::free_interrupt_handler(dev);
+        Log::info("interrupt handler free'ed");
     }
 
     Ethernet::interrupt_handler(Ethernet::Device dev)
@@ -61,4 +71,11 @@
     PCI::set_master = true;
 
     Ethernet::ifname = "rtx%d";
+
+    e1000::rx_ring_size = 256; /* Number of incoming packets we can buffer */
+    /*
+     * The e1000 supports seven receive buffer sizes: 256, 512, 1024, 2048,
+     * 4096, 8192 and 16384 bytes:
+     */
+    e1000::rx_buffer_len = 2048;
 }
diff --git a/rathaxes/samples/e1000/socket.blt b/rathaxes/samples/e1000/socket.blt
--- a/rathaxes/samples/e1000/socket.blt
+++ b/rathaxes/samples/e1000/socket.blt
@@ -4,7 +4,11 @@
     {
         chunk LKM::includes()
         {
+            typedef int ${Socket::SKBuff};
+
             #include <linux/skbuff.h>
+
+            static const ${Socket::SKBuff} force_rtx_lnux_skbuf_decl;
         }
 
         chunk ::decl()