changeset 88:8ffcdd6aa410

WIP on refactoring + tx
author Louis Opter <louis@lse.epitech.net>
date Mon, 01 Oct 2012 01:20:35 +0200
parents e9736ab70995
children a9b47a2f8b98
files e1000_implement_the_frame_transmission_chunk.patch
diffstat 1 files changed, 509 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/e1000_implement_the_frame_transmission_chunk.patch	Mon Sep 24 09:13:15 2012 +0200
+++ b/e1000_implement_the_frame_transmission_chunk.patch	Mon Oct 01 01:20:35 2012 +0200
@@ -78,27 +78,126 @@
 diff --git a/rathaxes/samples/e1000/e1000.blt b/rathaxes/samples/e1000/e1000.blt
 --- a/rathaxes/samples/e1000/e1000.blt
 +++ b/rathaxes/samples/e1000/e1000.blt
-@@ -110,9 +110,9 @@
+@@ -11,16 +11,15 @@
+ 
+         chunk   ::decl()
+         {
+-            typedef struct rtx_e1000_rx_descriptor
++            struct rtx_e1000_rx_descriptor
+             {
+-                /* actual types are in comments */
+-                unsigned long int   /* __le64 */    buff_addr;
+-                unsigned short      /* __le16 */    length;
+-                unsigned short      /* __le16 */    csum;
+-                unsigned char                       status;
+-                unsigned char                       errors;
+-                unsigned short      /* __le16 */    special;
+-            } *rtx_e1000_rx_descriptor_p;
++                __le64          buff_addr;
++                __le16          length;
++                __le16          csum;
++                unsigned char   status;
++                unsigned char   errors;
++                __le16          special;
++            };
+         }
+ 
+         chunk   ::init()
+@@ -50,30 +49,30 @@
+ 
+         chunk   ::decl()
+         {
+-            typedef struct rtx_e1000_tx_descriptor
++            struct rtx_e1000_tx_descriptor
+             {
+-                unsigned long int   /* __le64 */    buff_addr;
++                __le64                  buff_addr;
+                 union
+                 {
+-                    unsigned int    /* __le32 */    data;
++                    __le32              data;
+                     struct
+                     {
+-                        unsigned short /* __le16 */ length;
+-                        unsigned char               csum_offset; /* CSO */
+-                        unsigned char               cmd;
+-                    }                               fields;
+-                }                                   lower;
++                        __le16          length;
++                        unsigned char   csum_offset; /* CSO */
++                        unsigned char   cmd;
++                    }                   fields;
++                }                       lower;
+                 union
+                 {
+-                    unsigned int    /* __le32 */    data;
++                    __le32              data;
+                     struct
+                     {
+-                        unsigned char               status;
+-                        unsigned char               csum_start; /* CSS */
+-                        unsigned short /* __le16 */ special;
+-                    }                               fields;
+-                }                                   upper;
+-            } *rtx_e1000_tx_descriptor_p;
++                        unsigned char   status;
++                        unsigned char   csum_start; /* CSS */
++                        __le16          special;
++                    }                   fields;
++                }                       upper;
++            };
+         }
+ 
+         chunk   ::init()
+@@ -93,9 +92,8 @@
+      *   CNorm unstrict);
+      * - dma_base: (physical) address of the ring where the device can access
+      *   the different descriptors;
+-     * - skbuffs: array of the skbuffs associated with each descriptor;
+-     * - dma_skbuffs: (physical) address of each skbuff where the device can
+-     *   write the received packets;
++     * - skbuffs: array of the skbuffs and their dma (physical) address
++     *   associated with each descriptor.
+      */
+     template type   e1000::RxRing()
+     {
+@@ -110,9 +108,8 @@
              {
                  unsigned int                    size;
                  struct rtx_e1000_rx_descriptor  *base;
 -                void*   /* dma_addr_t */        dma_base;
+-                struct sk_buff                  *skbuffs[256 /* ${config.rx_ring_size} */];
+-                void*   /* dma_addr_t */        dma_skbuffs[256 /* ${config.rx_ring_size} */];
 +                dma_addr_t                      dma_base;
-                 struct sk_buff                  *skbuffs[256 /* ${config.rx_ring_size} */];
--                void*   /* dma_addr_t */        dma_skbuffs[256 /* ${config.rx_ring_size} */];
-+                dma_addr_t                      dma_skbuffs[256 /* ${config.rx_ring_size} */];
++                ${Socket::SKBuff}               skbuffs[${config.rx_ring_size}];
              };
          }
  
-@@ -148,11 +148,59 @@
+@@ -134,7 +131,13 @@
+      * - base: address of the ring (we can't use the typedef here until we get
+      *   CNorm unstrict);
+      * - dma_base: (physical) address of the ring where the device can access
+-     *   the different descriptors.
++     *   the different descriptors;
++     * - skbuffs: the skbuffs associated with each descriptor of the ring;
++     * - head: index on the head of the ring;
++     * - tail: index on the tail of the ring.
++     *
++     * Keep in mind that the head and tail fields are, obviously, not
++     * synchronized with TDT/TDH on the device.
+      */
+     template type   e1000::TxRing()
+     {
+@@ -148,11 +151,88 @@
              struct rtx_e1000_tx_ring
              {
                  unsigned int                    size;
-+                /* We should probably use ${e1000::TxDescriptor} here: */
-                 struct rtx_e1000_tx_descriptor  *base;
+-                struct rtx_e1000_tx_descriptor  *base;
 -                void*   /* dma_addr_t */        dma_base;
++                /* XXX: can't use ${e1000::TxDescriptor} here: */
++                struct rtx_e1000_tx_descriptor  *base; /* rename to descs */
 +                dma_addr_t                      dma_base;
-+                /* indexes on base */
++                ${Socket::SKBuff}               skbuffs[${config.tx_ring_size}];
 +                unsigned int                    head;
 +                unsigned int                    tail;
              };
@@ -108,7 +207,8 @@
 +        {
 +            static unsigned int rtx_e1000_tx_ring_descriptors_remaining(struct rtx_e1000_tx_ring *);
 +            static int          rtx_e1000_tx_ring_tso_cksum_offload(struct rtx_e1000_tx_ring *, struct rtx_socket_skbuff *);
-+            static int          rtx_e1000_tx_ring_put(struct rtx_e1000_tx_ring *, struct rtx_socket_skbuff *);
++            static void         rtx_e1000_tx_ring_put(struct rtx_e1000_tx_ring *, struct rtx_socket_skbuff *);
++            static void         rtx_e1000_tx_ring_start_xmit(struct rtx_e1000_tx_ring *, const struct rtx_e1000_ctx *);
 +        }
 +
 +        chunk   LKM::code()
@@ -128,9 +228,32 @@
 +                return skb_is_gso(skb->skbuff) || skb->skbuff->ip_summed == CHECKSUM_PARTIAL;
 +            }
 +
-+            static int          rtx_e1000_tx_ring_put(struct rtx_e1000_tx_ring *self, struct rtx_socket_skbuff *skb)
++            static void         rtx_e1000_tx_ring_put(struct rtx_e1000_tx_ring *self, struct rtx_socket_skbuff *skb)
 +            {
-+                return NETDEV_TX_OK;
++                WARN_ON(!skb);
++
++                /*
++                 * Mark it as the last buffer (EOP) and ask the card to
++                 * insert the Ethernet FCS (Frame Check Sequence).
++                 *
++                 * XXX: it sucks to use skb_headlen() here (this part of the
++                 * code shouldn't be aware of it and use something more
++                 * abstract.
++                 */
++                struct rtx_e1000_tx_descriptor *tx_desc = &self->base[self->tail];
++                tx_desc->lower.data = cpu_to_le32(
++                        E1000_TXD_CMD_EOP  |
++                        E1000_TXD_CMD_IFCS |
++                        skb_headlen(skb->skbuff));
++                tx_desc->upper.data = 0;
++                tx_desc->buff_addr = cpu_to_le64(skb->dma_handle);
++                memcpy(&self->skbuffs[self->tail], skb, sizeof(*skb));
++                self->tail = (self->tail + 1) % 256 /* XXX: ${config.tx_ring_size} */;
++            }
++
++            static void         rtx_e1000_tx_ring_start_xmit(struct rtx_e1000_tx_ring *self, const struct rtx_e1000_ctx *hw_ctx)
++            {
++                rtx_e1000_register_write32(hw_ctx, E1000_TDT, self->tail);
 +            }
 +        }
 +
@@ -148,10 +271,15 @@
 +            rtx_e1000_tx_ring_put(${self}, &${skb});
 +        }
 +
++        chunk   start_xmit(e1000::Context ctx)
++        {
++            rtx_e1000_tx_ring_start_xmit(${self}, ${ctx});
++        }
++
          chunk   ::init()
          {
          }
-@@ -183,7 +231,7 @@
+@@ -183,7 +263,7 @@
              struct rtx_e1000_ctx
              {
                  int                         bars;
@@ -160,26 +288,41 @@
                  int                         irq;
  
                  /* we can't use the Rathaxes type here (#8) */
-@@ -334,6 +382,30 @@
+@@ -334,6 +414,45 @@
          }
      }
  
-+    template type   e1000::TxFlags()
++    template type   e1000::TxDescriptorFlags()
 +    {
 +        chunk LKM::includes()
 +        {
-+            static const ${e1000::TxFlags}  force_enum_rtx_e1000_tx_flags_decl;
++            static const ${e1000::TxDescriptorFlags}  force_enum_rtx_e1000_tx_descriptor_flags_decl;
 +        }
 +
 +        chunk ::decl()
 +        {
-+            enum    rtx_e1000_tx_flags
++            enum    rtx_e1000_tx_descriptor_flags
 +            {
-+                E1000_TX_FLAGS_CSUM     = 0x00000001,
-+                E1000_TX_FLAGS_VLAN     = 0x00000002,
-+                E1000_TX_FLAGS_TSO      = 0x00000004,
-+                E1000_TX_FLAGS_IPV4     = 0x00000008,
-+                E1000_TX_FLAGS_NO_FCS   = 0x00000010,
++                E1000_TXD_DTYP_D        = 0x00100000, /* Data Descriptor */
++                E1000_TXD_DTYP_C        = 0x00000000, /* Context Descriptor */
++                E1000_TXD_POPTS_IXSM    = 0x01,       /* Insert IP checksum */
++                E1000_TXD_POPTS_TXSM    = 0x02,       /* Insert TCP/UDP checksum */
++                E1000_TXD_CMD_EOP       = 0x01000000, /* End of Packet */
++                E1000_TXD_CMD_IFCS      = 0x02000000, /* Insert FCS (Ethernet CRC) */
++                E1000_TXD_CMD_IC        = 0x04000000, /* Insert Checksum */
++                E1000_TXD_CMD_RS        = 0x08000000, /* Report Status */
++                E1000_TXD_CMD_RPS       = 0x10000000, /* Report Packet Sent */
++                E1000_TXD_CMD_DEXT      = 0x20000000, /* Descriptor extension (0 = legacy) */
++                E1000_TXD_CMD_VLE       = 0x40000000, /* Add VLAN tag */
++                E1000_TXD_CMD_IDE       = 0x80000000, /* Enable Tidv register */
++                E1000_TXD_STAT_DD       = 0x00000001, /* Descriptor Done */
++                E1000_TXD_STAT_EC       = 0x00000002, /* Excess Collisions */
++                E1000_TXD_STAT_LC       = 0x00000004, /* Late Collisions */
++                E1000_TXD_STAT_TU       = 0x00000008, /* Transmit underrun */
++                E1000_TXD_CMD_TCP       = 0x01000000, /* TCP packet */
++                E1000_TXD_CMD_IP        = 0x02000000, /* IP packet */
++                E1000_TXD_CMD_TSE       = 0x04000000, /* TCP Seg enable */
++                E1000_TXD_STAT_TC       = 0x00000004, /* Tx Underrun */
 +            };
 +        }
 +
@@ -191,18 +334,104 @@
      template sequence   e1000::create_device()
      {
          chunk Ethernet::create_device(PCI::Device pdev, Ethernet::Device rtx_ether_ctx)
-@@ -358,8 +430,8 @@
-                 ${Log::info("e1000::create: pci_enable_device_mem failed")};
-             if (pci_request_selected_regions(${pdev}, ${rtx_ether_ctx}->hw_ctx.bars, ${config.name}))
-                 ${Log::info("e1000::create: pci_request_selected_regions failed")};
--            if (${config.set_master})
--                pci_set_master(${pdev});
-+// XXX      if (${config.set_master})
-+// XXX          pci_set_master(${pdev});
+@@ -376,8 +495,7 @@
+             udelay(10);
+ 
+             /* Now we can load its mac address (thanks minix code) */
+-            int i = 0;
+-            for (i = 0 /* < this is not generated! (cnorm bug) */; i < 3; ++i)
++            for (int i = 0; i < 3; ++i)
+             {
+                 rtx_e1000_register_write32(&${rtx_ether_ctx}->hw_ctx, E1000_EEPROM_READ, (i << 8) | 1);
+ 
+@@ -420,6 +538,7 @@
+         }
+     }
+ 
++    /* TODO: make that a method of e1000::Context */
+     template sequence   e1000::print_status(Ethernet::Device ctx)
+     {
+         chunk   LKM::prototypes()
+@@ -466,17 +585,19 @@
+      * ${e1000.init(E1000_STATUS); // didn't work, so we used the next line
+      * reg_status = E1000_STATUS;
+      * ${e1000::register_read32(rtx_ether_ctx->hw_ctx, reg_status)};
++     *
++     * TODO: make them methods of e1000::Context
+      */
+     template sequence   e1000::register_read32(e1000::Context ctx, e1000::Register reg_offset)
+     {
+         chunk   LKM::prototypes()
+         {
+-            static unsigned int    rtx_e1000_register_read32(struct rtx_e1000_ctx *, unsigned int);
++            static unsigned int    rtx_e1000_register_read32(const struct rtx_e1000_ctx *, unsigned int);
+         }
+ 
+         chunk   LKM::code()
+         {
+-            static unsigned int    rtx_e1000_register_read32(struct rtx_e1000_ctx *ctx, unsigned int reg_offset)
++            static unsigned int    rtx_e1000_register_read32(const struct rtx_e1000_ctx *ctx, unsigned int reg_offset)
+             {
+                 return ioread32(ctx->ioaddr + reg_offset);
+             }
+@@ -492,12 +613,12 @@
+     {
+         chunk   LKM::prototypes()
+         {
+-            static void rtx_e1000_register_write32(struct rtx_e1000_ctx *, unsigned int, unsigned int);
++            static void rtx_e1000_register_write32(const struct rtx_e1000_ctx *, unsigned int, unsigned int);
+         }
  
-             /* 0 here is for BAR_0: */
-             ${rtx_ether_ctx}->hw_ctx.ioaddr = pci_ioremap_bar(${pdev}, 0);
-@@ -630,8 +702,6 @@
+         chunk   LKM::code()
+         {
+-            static void rtx_e1000_register_write32(struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
++            static void rtx_e1000_register_write32(const struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
+             {
+                 iowrite32(value, ctx->ioaddr + reg_offset);
+             }
+@@ -513,12 +634,12 @@
+     {
+         chunk   LKM::prototypes()
+         {
+-            static void rtx_e1000_register_set32(struct rtx_e1000_ctx *, unsigned int, unsigned int);
++            static void rtx_e1000_register_set32(const struct rtx_e1000_ctx *, unsigned int, unsigned int);
+         }
+ 
+         chunk   LKM::code()
+         {
+-            static void rtx_e1000_register_set32(struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
++            static void rtx_e1000_register_set32(const struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
+             {
+                 iowrite32(rtx_e1000_register_read32(ctx, reg_offset) | value, ctx->ioaddr + reg_offset);
+             }
+@@ -534,12 +655,12 @@
+     {
+         chunk   LKM::prototypes()
+         {
+-            static void rtx_e1000_register_unset32(struct rtx_e1000_ctx *, unsigned int, unsigned int);
++            static void rtx_e1000_register_unset32(const struct rtx_e1000_ctx *, unsigned int, unsigned int);
+         }
+ 
+         chunk   LKM::code()
+         {
+-            static void rtx_e1000_register_unset32(struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
++            static void rtx_e1000_register_unset32(const struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
+             {
+                 iowrite32(rtx_e1000_register_read32(ctx, reg_offset) & ~value, ctx->ioaddr + reg_offset);
+             }
+@@ -626,12 +747,18 @@
+         }
+     }
+ 
++    /* TODO: refactor
++     *
++     * Split into two method methods:
++     * - e1000::RxRing::init_rx()
++     * - e1000::TxRing::init_tx()
++     *
++     * Also it should use the new methods in Socket::SKbuff.
++     */
+     template sequence   set_up_device(Ethernet::Device ctx)
      {
          chunk  ::CALL()
          {
@@ -211,7 +440,32 @@
              /*
               * This part is documented in the Intel Gigabit Ethernet Controller
               * Software Developper manual. (You can find it in the doc/hardware
-@@ -733,7 +803,7 @@
+@@ -663,6 +790,8 @@
+              *   E1000_CRCERRS to E1000_TSCTFC.
+              */
+ 
++            int i;
++
+             rtx_e1000_register_set32(hw_ctx, E1000_CTRL,
+                                      E1000_CMD_ASDE |
+                                      E1000_CMD_SLU);
+@@ -676,7 +805,6 @@
+             rtx_e1000_register_write32(hw_ctx, E1000_FCAL, 0);
+             rtx_e1000_register_write32(hw_ctx, E1000_FCT, 0);
+             rtx_e1000_register_write32(hw_ctx, E1000_FCTTV, 0);
+-            int i = 0; /* CNorm workaround, the init part of for isn't generated */
+             for (i = 0; i != 64; ++i)
+                 rtx_e1000_register_write32(hw_ctx, E1000_CRCERRS + i * 4, 0);
+             
+@@ -719,7 +847,6 @@
+ 
+             /* 2. Initialize the MTA */
+ 
+-            i = 0; /* CNorm workaround, the init part of for isn't generated */
+             for (i = 0; i != 128; ++i)
+                 rtx_e1000_register_write32(hw_ctx, E1000_MTA + i * 4, 0);
+ 
+@@ -733,7 +860,7 @@
              hw_ctx->rx_ring.base = dma_alloc_coherent(
                      &${ctx}->pci_dev->dev,
                      hw_ctx->rx_ring.size,
@@ -220,16 +474,60 @@
                      GFP_KERNEL);
              if (!hw_ctx->rx_ring.base)
              {
-@@ -758,7 +828,7 @@
+@@ -747,41 +874,37 @@
+              * Allocate the skbuffs, map them for DMA, and write their address
+              * in the corresponding descriptor.
+              */
+-            i = 0;
+             for (i = 0; i != ${config.rx_ring_size}; ++i)
+             {
+-                hw_ctx->rx_ring.skbuffs[i] = netdev_alloc_skb(
++                hw_ctx->rx_ring.skbuffs[i].skbuff = netdev_alloc_skb(
+                         ${ctx}->net_dev,
+                         ${config.rx_buffer_len});
+-                if (!hw_ctx->rx_ring.skbuffs[i])
++                if (!hw_ctx->rx_ring.skbuffs[i].skbuff)
+                 {
                      ${Log::info("cannot allocate a skbuff for the rx ring")};
                      goto err_skbuffs_alloc;
                  }
 -                hw_ctx->rx_ring.dma_skbuffs[i] = (void *)dma_map_single(
-+                hw_ctx->rx_ring.dma_skbuffs[i] = dma_map_single(
++                hw_ctx->rx_ring.skbuffs[i].dma_handle = dma_map_single(
                          &${ctx}->pci_dev->dev,
-                         hw_ctx->rx_ring.skbuffs[i]->data,
+-                        hw_ctx->rx_ring.skbuffs[i]->data,
++                        hw_ctx->rx_ring.skbuffs[i].skbuff->data,
                          ${config.rx_buffer_len},
-@@ -820,7 +890,7 @@
+                         DMA_FROM_DEVICE);
+-                /*
+-                 * Either this fails because, when compiling with gcc because
+-                 * the last argument is not of the correct type (dma_addr_t).
+-                 * Or it fails because of the lack of CNorm Unstrict.
+-                 */
+-                if (dma_mapping_error(&${ctx}->pci_dev->dev, (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i]))
++                int dma_error = dma_mapping_error(&${ctx}->pci_dev->dev,
++                        hw_ctx->rx_ring.skbuffs[i].dma_handle);
++                if (dma_error)
+                 {
+                     ${Log::info("cannot dma-map a skbuff for the rx ring")};
+                     goto err_skbuffs_map;
+                 }
+-                hw_ctx->rx_ring.base[i].buff_addr = (unsigned long int)cpu_to_le64(
+-                        hw_ctx->rx_ring.dma_skbuffs[i]);
++                hw_ctx->rx_ring.base[i].buff_addr = cpu_to_le64(
++                        hw_ctx->rx_ring.skbuffs[i].skbuff);
+             }
+ 
+             ${Log::info("setup_device: skbuffs allocated")};
+ 
+             /* 5. Save the emplacement and the size of the ring in RDBA/RDLEN */
+-            rtx_e1000_register_write32(hw_ctx, E1000_RDBAL, (dma_addr_t)hw_ctx->rx_ring.dma_base & 0xffffffff);
+-            rtx_e1000_register_write32(hw_ctx, E1000_RDBAH, (dma_addr_t)hw_ctx->rx_ring.dma_base >> 32);
++            rtx_e1000_register_write32(hw_ctx, E1000_RDBAL, hw_ctx->rx_ring.dma_base & 0xffffffff);
++            rtx_e1000_register_write32(hw_ctx, E1000_RDBAH, hw_ctx->rx_ring.dma_base >> 32);
+             rtx_e1000_register_write32(hw_ctx, E1000_RDLEN, hw_ctx->rx_ring.size);
+ 
+             /* 6. Setup RDH/RDT */
+@@ -820,7 +943,7 @@
              hw_ctx->tx_ring.base = dma_alloc_coherent(
                      &${ctx}->pci_dev->dev,
                      hw_ctx->tx_ring.size,
@@ -238,7 +536,16 @@
                      GFP_KERNEL);
              if (!hw_ctx->rx_ring.base)
              {
-@@ -838,6 +908,8 @@
+@@ -831,13 +954,15 @@
+             ${Log::info("setup_device: tx descriptors allocated")};
+ 
+             /* 2. Save the emplacement and the size of the ring in TDBA/TDLEN */
+-            rtx_e1000_register_write32(hw_ctx, E1000_TDBAL, (dma_addr_t)hw_ctx->tx_ring.dma_base & 0xffffffff);
+-            rtx_e1000_register_write32(hw_ctx, E1000_TDBAH, (dma_addr_t)hw_ctx->tx_ring.dma_base >> 32);
++            rtx_e1000_register_write32(hw_ctx, E1000_TDBAL, hw_ctx->tx_ring.dma_base & 0xffffffff);
++            rtx_e1000_register_write32(hw_ctx, E1000_TDBAH, hw_ctx->tx_ring.dma_base >> 32);
+             rtx_e1000_register_write32(hw_ctx, E1000_TDLEN, hw_ctx->tx_ring.size);
+ 
              /* 3. Setup TDH/TDT to zero: the queue is empty */
              rtx_e1000_register_write32(hw_ctx, E1000_TDH, 0);
              rtx_e1000_register_write32(hw_ctx, E1000_TDT, 0);
@@ -247,7 +554,76 @@
  
              /* 4. Set TCTL.PSP and enable the transmitter */
              rtx_e1000_register_set32(hw_ctx, E1000_TCTL, E1000_TCTL_PSP|E1000_TCTL_PSP);
-@@ -930,4 +1002,81 @@
+@@ -860,15 +985,15 @@
+             {
+                 dma_unmap_single(
+                         &${ctx}->pci_dev->dev,
+-                        (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i],
++                        hw_ctx->rx_ring.skbuffs[i].dma_handle,
+                         ${config.rx_buffer_len},
+                         DMA_FROM_DEVICE);
+         err_skbuffs_map:
+-                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i]);
++                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i].skbuff);
+             }
+ 
+             dma_free_coherent(&${ctx}->pci_dev->dev, hw_ctx->rx_ring.size,
+-                    hw_ctx->rx_ring.base, (dma_addr_t)hw_ctx->rx_ring.dma_base);
++                    hw_ctx->rx_ring.base, hw_ctx->rx_ring.dma_base);
+         err_rx_ring_alloc:
+             return -ENOMEM;
+ 
+@@ -876,12 +1001,15 @@
+         }
+     }
+ 
++    /* TODO:
++     *
++     * Refactor into two methods (one in RxRing and one in TxRing) and make use
++     * of the new methods in Socket::SKBuff.
++     */
+     template sequence   free_rx_tx(Ethernet::Device ctx)
+     {
+         chunk   ::CALL()
+         {
+-            typedef unsigned long int   dma_addr_t;
+-
+             ${e1000::Context} *hw_ctx;
+             hw_ctx = &${ctx}->hw_ctx;
+ 
+@@ -890,18 +1018,17 @@
+              * - Unmap and free the skbuffs;
+              * - Free the descriptors array.
+              */
+-            int i = 0;
+-            for (i = 0; i != ${config.rx_ring_size}; ++i)
++            for (int i = 0; i != ${config.rx_ring_size}; ++i)
+             {
+                 dma_unmap_single(
+                         &${ctx}->pci_dev->dev,
+-                        (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i],
++                        (dma_addr_t)hw_ctx->rx_ring.skbuffs[i].dma_handle,
+                         ${config.rx_buffer_len},
+                         DMA_FROM_DEVICE);
+-                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i]);
++                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i].skbuff);
+             }
+             dma_free_coherent(&${ctx}->pci_dev->dev, hw_ctx->rx_ring.size,
+-                    hw_ctx->rx_ring.base, (dma_addr_t)hw_ctx->rx_ring.dma_base);
++                    hw_ctx->rx_ring.base, hw_ctx->rx_ring.dma_base);
+             ${Log::info("free_rx_tx: rx ring free'ed")};
+ 
+             /*
+@@ -909,7 +1036,7 @@
+              * - Free the descriptors array.
+              */
+             dma_free_coherent(&${ctx}->pci_dev->dev, hw_ctx->tx_ring.size,
+-                    hw_ctx->tx_ring.base, (dma_addr_t)hw_ctx->tx_ring.dma_base);
++                    hw_ctx->tx_ring.base, hw_ctx->tx_ring.dma_base);
+             ${Log::info("free_rx_tx: tx ring free'ed")};
+         }
+     }
+@@ -930,4 +1057,92 @@
              }
          }
      }
@@ -268,19 +644,15 @@
 +             * NETDEV_TX_BUSY.
 +             */
 +
-+            /*
-+             * XXX: This leaves a placeholder if I cast local.tx_ring as
-+             * e1000::TxRing below.
-+             */
 +            ${Socket::SKBuff} skb;
-+            ${local.skb.init(kernel_skb)};
++            ${e1000::Context} *hw_ctx;
++            ${e1000::TxRing} *tx_ring;
++            ${Device::Device} dev;
 +
-+            /*
-+             * XXX: can't write ${e1000::TxRing} * (the placeholder isn't
-+             * resolved).
-+             */
-+            struct rtx_e1000_tx_ring *tx_ring = &${ctx}->hw_ctx.tx_ring;
-+            //${cast local.tx_ring as e1000::TxRing};
++            ${local.skb.init(kernel_skb)};
++            hw_ctx = &${ctx}->hw_ctx;
++            tx_ring = &hw_ctx->tx_ring;
++            dev = &${ctx}->pci_dev->dev;
 +
 +            ${Log::info("xmit: skbuff details:")};
 +            /*
@@ -297,7 +669,7 @@
 +            /*
 +             * The transmission is going to be several steps:
 +             * 1. TCP Segmentation Offload & Checksum Offloading: pick a
-+             *    descriptor from the tx ring and fill it as a contex
++             *    descriptor from the tx ring and fill it as a context
 +             *    descriptor to allow the card to slice into several packets
 +             *    according to the MSS;
 +             * 2. DMA Map the skbuff data as slices of 4096;
@@ -309,22 +681,37 @@
 +                return NETDEV_TX_BUSY;
 +
 +            /* 1. Offloading */
++
 +            /* XXX: ${local.tx_ring.tso_cksum_offload(skb)}; */
 +            if (rtx_e1000_tx_ring_tso_cksum_offload(tx_ring, &skb))
 +            {
 +                ${Log::info("xmit: the packet needs to be fragmented and/or checksummed but this not implemented yet!")};
-+                return NETDEV_TX_OK;
++                goto err_offload;
 +            }
 +
 +            /* 2. Map the data */
-+            ${Device::Device} dev = &${ctx}->pci_dev->dev;
-+            /* XXX: ${local.skb.dma_map(local.dev)}; */
-+            rtx_socket_dma_map(&skb, dev);
 +
-+            /* 3. Update the TX Ring */
++            /* XXX: ${local.skb.map_to(local.dev)}; */
++            if (rtx_socket_skbuff_map(&skb, dev, DMA_TO_DEVICE))
++            {
++                ${Log::info("xmit: can't DMA map a SKbuff")};
++                goto err_skb_map_to;
++            }
++
++            /* 3. Update the TX Ring and signal the hardware */
++
 +            /* XXX: ${local.tx_ring.put(skb)}; */
 +            rtx_e1000_tx_ring_put(tx_ring, &skb);
 +
++            /* XXX: ${local.tx_ring.start_xmit(hw_ctx)}; */
++            rtx_e1000_tx_ring_start_xmit(tx_ring, hw_ctx);
++
++            return NETDEV_TX_OK;
++
++        err_offload:
++        err_skb_map_to:
++            /* XXX: ${local.skb.unmap_to_and_free(local.dev)}; */
++            rtx_socket_skbuff_unmap_and_free(&skb, dev, DMA_TO_DEVICE);
 +            return NETDEV_TX_OK;
 +        }
 +    }
@@ -332,7 +719,7 @@
 diff --git a/rathaxes/samples/e1000/e1000.rti b/rathaxes/samples/e1000/e1000.rti
 --- a/rathaxes/samples/e1000/e1000.rti
 +++ b/rathaxes/samples/e1000/e1000.rti
-@@ -31,8 +31,14 @@
+@@ -31,8 +31,15 @@
      provided type   TxRing
      {
          chunk       LKM::includes();
@@ -344,14 +731,15 @@
 +        method      descriptors_remaining();
 +        method      tso_cksum_offload(Socket::SKBuff);
 +        method      put(Socket::SKBuff);
++        method      start_xmit(e1000::Context);
      }
  
      /*
-@@ -51,6 +57,12 @@
+@@ -51,6 +58,12 @@
          method      decl();
      }
  
-+    provided type   TxFlags
++    provided type   TxDescriptorFlags
 +    {
 +        chunk       LKM::includes();
 +        chunk       ::decl();
@@ -360,7 +748,7 @@
      provided sequence   create_device()
      {
          provided chunk  Ethernet::create_device(PCI::Device, Ethernet::Device);
-@@ -109,6 +121,16 @@
+@@ -109,6 +122,16 @@
          provided chunk  ::CALL();
      }
  
@@ -614,7 +1002,7 @@
 diff --git a/rathaxes/samples/e1000/socket.blt b/rathaxes/samples/e1000/socket.blt
 --- a/rathaxes/samples/e1000/socket.blt
 +++ b/rathaxes/samples/e1000/socket.blt
-@@ -1,20 +1,104 @@
+@@ -1,20 +1,153 @@
 -with Socket, LKM
 +with Socket, LKM, Device, Ethernet
  {
@@ -660,13 +1048,16 @@
 +        chunk   LKM::prototypes()
          {
 +            static void rtx_socket_skbuff_dump_infos(struct rtx_socket_skbuff *);
-+            static int  rtx_socket_dma_map(struct rtx_socket_skbuff *, struct device *);
++            static int  rtx_socket_skbuff_map(struct rtx_socket_skbuff *, struct device *, enum dma_data_direction);
++            static void rtx_socket_skbuff_unmap_and_free(struct rtx_socket_skbuff *, struct device *, enum dma_data_direction);
 +        }
 +
 +        chunk   LKM::code()
 +        {
 +            static void rtx_socket_skbuff_dump_infos(struct rtx_socket_skbuff *self)
 +            {
++                WARN_ON(!self->skbuff);
++
 +                /*
 +                 * We should use a Rathaxes log abstraction instead of pr_info here,
 +                 * but Rathaxes doesn't support functions with a variable number of
@@ -691,14 +1082,45 @@
 +                );
 +            }
 +
-+            static int rtx_socket_dma_map(struct rtx_socket_skbuff *self, struct device *dev)
++            static int rtx_socket_skbuff_map(struct rtx_socket_skbuff *self,
++                                             struct device *dev,
++                                             enum dma_data_direction direction)
 +            {
++                WARN_ON(!self->skbuff);
++                WARN_ON(!self->skbuff->data);
++                WARN_ON(self->dma_handle);
++
 +                self->dma_handle = dma_map_single(
 +                        dev,
 +                        self->skbuff->data,
 +                        skb_headlen(self->skbuff),
-+                        DMA_TO_DEVICE);
-+                return dma_mapping_error(dev, self->dma_handle);
++                        direction);
++                int err = dma_mapping_error(dev, self->dma_handle);
++                if (err)
++                {
++                    self->dma_handle = 0;
++                    return err;
++                }
++                return 0;
++            }
++
++            static void rtx_socket_skbuff_unmap_and_free(struct rtx_socket_skbuff *self,
++                                                         struct device *dev,
++                                                         enum dma_data_direction direction)
++            {
++                WARN_ON(!self->skbuff);
++                WARN_ON(!self->skbuff->data);
++
++                if (self->dma_handle)
++                {
++                    dma_unmap_single(dev,
++                            self->dma_handle,
++                            skb_headlen(self->skbuff),
++                            direction);
++                    self->dma_handle = 0;
++                }
++                dev_kfree_skb_any(self->skbuff);
++                self->skbuff = 0;
 +            }
 +        }
 +
@@ -718,16 +1140,31 @@
 +            rtx_socket_skbuff_dump_infos(${self});
 +        }
 +
-+        chunk   dma_map(Device::Device dev)
++        chunk   map_to(Device::Device dev)
++        {
++            rtx_socket_skbuff_map(${self}, ${dev}, DMA_TO_DEVICE);
++        }
++
++        chunk   map_from(Device::Device dev)
 +        {
-+            rtx_socket_dma_map(${self}, ${dev});
++            rtx_socket_skbuff_map(${self}, ${dev}, DMA_FROM_DEVICE);
++        }
++
++        chunk   unmap_to_and_free(Device::Device dev)
++        {
++            rtx_socket_skbuff_unmap_and_free(${self}, ${dev}, DMA_TO_DEVICE);
++        }
++
++        chunk   unmap_from_and_free(Device::Device dev)
++        {
++            rtx_socket_skbuff_unmap_and_free(${self}, ${dev}, DMA_FROM_DEVICE);
          }
  
          map
 diff --git a/rathaxes/samples/e1000/socket.rti b/rathaxes/samples/e1000/socket.rti
 --- a/rathaxes/samples/e1000/socket.rti
 +++ b/rathaxes/samples/e1000/socket.rti
-@@ -1,8 +1,19 @@
+@@ -1,8 +1,22 @@
 -interface Socket : LKM
 +interface Socket : LKM, Device
  {
@@ -749,6 +1186,9 @@
 +        method      decl();
 +        method      init(Socket::KernelSKBuff);
 +        method      dump_infos();
-+        method      dma_map(Device::Device);
++        method      map_to(Device::Device);
++        method      map_from(Device::Device);
++        method      unmap_to_and_free(Device::Device);
++        method      unmap_from_and_free(Device::Device);
      }
  }