view e1000_implement_the_frame_transmission_chunk.patch @ 89:a9b47a2f8b98

Wip on tx, fix ioaddr being incorrectly generated
author Louis Opter <louis@lse.epitech.net>
date Thu, 04 Oct 2012 19:33:43 +0200
parents 8ffcdd6aa410
children 4968acb39c7b
line wrap: on
line source

# HG changeset patch
# Parent 87ba2a19a59fb7be346ad40a57439b6b752b152e
rathaxes: start to queue up packets in the TX ring on the e1000 sample

diff --git a/rathaxes/samples/e1000/CMakeLists.txt b/rathaxes/samples/e1000/CMakeLists.txt
--- a/rathaxes/samples/e1000/CMakeLists.txt
+++ b/rathaxes/samples/e1000/CMakeLists.txt
@@ -1,6 +1,22 @@
-ADD_RATHAXES_SOURCES(e1000_src lkm.rtx
-                     RTI builtin.rti log.rti lkm.rti pci.rti socket.rti ethernet.rti e1000.rti
-                     BLT log.blt lkm.blt pci.blt socket.blt ethernet.blt e1000.blt)
+ADD_RATHAXES_SOURCES(e1000_src
+                     lkm.rtx
+                     RTI
+                     builtin.rti
+                     log.rti
+                     lkm.rti
+                     device.rti
+                     pci.rti
+                     socket.rti
+                     ethernet.rti
+                     e1000.rti
+                     BLT
+                     log.blt
+                     lkm.blt
+                     pci.blt
+                     device.blt
+                     socket.blt
+                     ethernet.blt
+                     e1000.blt)
 
 IF (LINUX_KBUILD_DIR)
     ADD_RATHAXES_LKM(e1000 e1000_src)
diff --git a/rathaxes/samples/e1000/device.blt b/rathaxes/samples/e1000/device.blt
new file mode 100644
--- /dev/null
+++ b/rathaxes/samples/e1000/device.blt
@@ -0,0 +1,25 @@
+with Device, LKM
+{
+    template type   Device::Device()
+    {
+        chunk   LKM::includes()
+        {
+            #include <linux/device.h>
+
+            static const ${Device::Device}  force_rtx_device_decl;
+        }
+
+        chunk   decl()
+        {
+            typedef struct device   *rtx_device_p;
+        }
+
+        chunk   init()
+        {
+        }
+
+        map
+        {
+        }
+    }
+}
diff --git a/rathaxes/samples/e1000/device.rti b/rathaxes/samples/e1000/device.rti
new file mode 100644
--- /dev/null
+++ b/rathaxes/samples/e1000/device.rti
@@ -0,0 +1,9 @@
+interface Device : LKM
+{
+    provided type   Device
+    {
+        chunk       LKM::includes();
+        method      decl();
+        method      init();
+    }
+}
diff --git a/rathaxes/samples/e1000/e1000.blt b/rathaxes/samples/e1000/e1000.blt
--- a/rathaxes/samples/e1000/e1000.blt
+++ b/rathaxes/samples/e1000/e1000.blt
@@ -11,16 +11,15 @@
 
         chunk   ::decl()
         {
-            typedef struct rtx_e1000_rx_descriptor
+            struct rtx_e1000_rx_descriptor
             {
-                /* actual types are in comments */
-                unsigned long int   /* __le64 */    buff_addr;
-                unsigned short      /* __le16 */    length;
-                unsigned short      /* __le16 */    csum;
-                unsigned char                       status;
-                unsigned char                       errors;
-                unsigned short      /* __le16 */    special;
-            } *rtx_e1000_rx_descriptor_p;
+                __le64          buff_addr;
+                __le16          length;
+                __le16          csum;
+                unsigned char   status;
+                unsigned char   errors;
+                __le16          special;
+            };
         }
 
         chunk   ::init()
@@ -50,30 +49,30 @@
 
         chunk   ::decl()
         {
-            typedef struct rtx_e1000_tx_descriptor
+            struct rtx_e1000_tx_descriptor
             {
-                unsigned long int   /* __le64 */    buff_addr;
+                __le64                  buff_addr;
                 union
                 {
-                    unsigned int    /* __le32 */    data;
+                    __le32              data;
                     struct
                     {
-                        unsigned short /* __le16 */ length;
-                        unsigned char               csum_offset; /* CSO */
-                        unsigned char               cmd;
-                    }                               fields;
-                }                                   lower;
+                        __le16          length;
+                        unsigned char   csum_offset; /* CSO */
+                        unsigned char   cmd;
+                    }                   fields;
+                }                       lower;
                 union
                 {
-                    unsigned int    /* __le32 */    data;
+                    __le32              data;
                     struct
                     {
-                        unsigned char               status;
-                        unsigned char               csum_start; /* CSS */
-                        unsigned short /* __le16 */ special;
-                    }                               fields;
-                }                                   upper;
-            } *rtx_e1000_tx_descriptor_p;
+                        unsigned char   status;
+                        unsigned char   csum_start; /* CSS */
+                        __le16          special;
+                    }                   fields;
+                }                       upper;
+            };
         }
 
         chunk   ::init()
@@ -93,9 +92,8 @@
      *   CNorm unstrict);
      * - dma_base: (physical) address of the ring where the device can access
      *   the different descriptors;
-     * - skbuffs: array of the skbuffs associated with each descriptor;
-     * - dma_skbuffs: (physical) address of each skbuff where the device can
-     *   write the received packets;
+     * - skbuffs: array of the skbuffs and their dma (physical) address
+     *   associated with each descriptor.
      */
     template type   e1000::RxRing()
     {
@@ -110,9 +108,8 @@
             {
                 unsigned int                    size;
                 struct rtx_e1000_rx_descriptor  *base;
-                void*   /* dma_addr_t */        dma_base;
-                struct sk_buff                  *skbuffs[256 /* ${config.rx_ring_size} */];
-                void*   /* dma_addr_t */        dma_skbuffs[256 /* ${config.rx_ring_size} */];
+                dma_addr_t                      dma_base;
+                ${Socket::SKBuff}               skbuffs[${config.rx_ring_size}];
             };
         }
 
@@ -134,7 +131,13 @@
      * - base: address of the ring (we can't use the typedef here until we get
      *   CNorm unstrict);
      * - dma_base: (physical) address of the ring where the device can access
-     *   the different descriptors.
+     *   the different descriptors;
+     * - skbuffs: the skbuffs associated with each descriptor of the ring;
+     * - head: index on the head of the ring;
+     * - tail: index on the tail of the ring.
+     *
+     * Keep in mind that the head and tail fields are, obviously, not
+     * synchronized with TDT/TDH on the device.
      */
     template type   e1000::TxRing()
     {
@@ -148,11 +151,88 @@
             struct rtx_e1000_tx_ring
             {
                 unsigned int                    size;
-                struct rtx_e1000_tx_descriptor  *base;
-                void*   /* dma_addr_t */        dma_base;
+                /* XXX: can't use ${e1000::TxDescriptor} here: */
+                struct rtx_e1000_tx_descriptor  *base; /* rename to descs */
+                dma_addr_t                      dma_base;
+                ${Socket::SKBuff}               skbuffs[${config.tx_ring_size}];
+                unsigned int                    head;
+                unsigned int                    tail;
             };
         }
 
+        chunk   LKM::prototypes()
+        {
+            static unsigned int rtx_e1000_tx_ring_descriptors_remaining(struct rtx_e1000_tx_ring *);
+            static int          rtx_e1000_tx_ring_tso_cksum_offload(struct rtx_e1000_tx_ring *, struct rtx_socket_skbuff *);
+            static void         rtx_e1000_tx_ring_put(struct rtx_e1000_tx_ring *, struct rtx_socket_skbuff *);
+            static void         rtx_e1000_tx_ring_start_xmit(struct rtx_e1000_tx_ring *, const struct rtx_e1000_ctx *);
+        }
+
+        chunk   LKM::code()
+        {
+            static unsigned int rtx_e1000_tx_ring_descriptors_remaining(struct rtx_e1000_tx_ring *self)
+            {
+                if (self->tail == self->head) /* ring is empty */
+                    return 256; /* XXX: ${config.tx_ring_size}; */
+                if (self->tail > self->head)
+                    /* XXX: ${config.tx_ring_size} */
+                    return 256 - (self->tail - self->head);
+                return self->head - self->tail;
+            }
+
+            static int          rtx_e1000_tx_ring_tso_cksum_offload(struct rtx_e1000_tx_ring *self, struct rtx_socket_skbuff *skb)
+            {
+                return skb_is_gso(skb->skbuff) || skb->skbuff->ip_summed == CHECKSUM_PARTIAL;
+            }
+
+            static void         rtx_e1000_tx_ring_put(struct rtx_e1000_tx_ring *self, struct rtx_socket_skbuff *skb)
+            {
+                WARN_ON(!skb);
+
+                /*
+                 * Mark it as the last buffer (EOP) and ask the card to
+                 * insert the Ethernet FCS (Frame Check Sequence).
+                 *
+                 * XXX: it sucks to use skb_headlen() here (this part of the
+                 * code shouldn't be aware of it and use something more
+                 * abstract.
+                 */
+                struct rtx_e1000_tx_descriptor *tx_desc = &self->base[self->tail];
+                tx_desc->lower.data = cpu_to_le32(
+                        E1000_TXD_CMD_EOP  |
+                        E1000_TXD_CMD_IFCS |
+                        skb_headlen(skb->skbuff));
+                tx_desc->upper.data = 0;
+                tx_desc->buff_addr = cpu_to_le64(skb->dma_handle);
+                memcpy(&self->skbuffs[self->tail], skb, sizeof(*skb));
+                self->tail = (self->tail + 1) % 256 /* XXX: ${config.tx_ring_size} */;
+            }
+
+            static void         rtx_e1000_tx_ring_start_xmit(struct rtx_e1000_tx_ring *self, const struct rtx_e1000_ctx *hw_ctx)
+            {
+                rtx_e1000_register_write32(hw_ctx, E1000_TDT, self->tail);
+            }
+        }
+
+        chunk   descriptors_remaining()
+        {
+            rtx_e1000_tx_ring_descriptors_remaining(${self});
+        }
+
+        chunk   tso_cksum_offload(Socket::SKBuff skb)
+        {
+        }
+
+        chunk   put(Socket::SKBuff skb)
+        {
+            rtx_e1000_tx_ring_put(${self}, &${skb});
+        }
+
+        chunk   start_xmit(e1000::Context ctx)
+        {
+            rtx_e1000_tx_ring_start_xmit(${self}, ${ctx});
+        }
+
         chunk   ::init()
         {
         }
@@ -334,6 +414,45 @@
         }
     }
 
+    template type   e1000::TxDescriptorFlags()
+    {
+        chunk LKM::includes()
+        {
+            static const ${e1000::TxDescriptorFlags}  force_enum_rtx_e1000_tx_descriptor_flags_decl;
+        }
+
+        chunk ::decl()
+        {
+            enum    rtx_e1000_tx_descriptor_flags
+            {
+                E1000_TXD_DTYP_D        = 0x00100000, /* Data Descriptor */
+                E1000_TXD_DTYP_C        = 0x00000000, /* Context Descriptor */
+                E1000_TXD_POPTS_IXSM    = 0x01,       /* Insert IP checksum */
+                E1000_TXD_POPTS_TXSM    = 0x02,       /* Insert TCP/UDP checksum */
+                E1000_TXD_CMD_EOP       = 0x01000000, /* End of Packet */
+                E1000_TXD_CMD_IFCS      = 0x02000000, /* Insert FCS (Ethernet CRC) */
+                E1000_TXD_CMD_IC        = 0x04000000, /* Insert Checksum */
+                E1000_TXD_CMD_RS        = 0x08000000, /* Report Status */
+                E1000_TXD_CMD_RPS       = 0x10000000, /* Report Packet Sent */
+                E1000_TXD_CMD_DEXT      = 0x20000000, /* Descriptor extension (0 = legacy) */
+                E1000_TXD_CMD_VLE       = 0x40000000, /* Add VLAN tag */
+                E1000_TXD_CMD_IDE       = 0x80000000, /* Enable Tidv register */
+                E1000_TXD_STAT_DD       = 0x00000001, /* Descriptor Done */
+                E1000_TXD_STAT_EC       = 0x00000002, /* Excess Collisions */
+                E1000_TXD_STAT_LC       = 0x00000004, /* Late Collisions */
+                E1000_TXD_STAT_TU       = 0x00000008, /* Transmit underrun */
+                E1000_TXD_CMD_TCP       = 0x01000000, /* TCP packet */
+                E1000_TXD_CMD_IP        = 0x02000000, /* IP packet */
+                E1000_TXD_CMD_TSE       = 0x04000000, /* TCP Seg enable */
+                E1000_TXD_STAT_TC       = 0x00000004, /* Tx Underrun */
+            };
+        }
+
+        map
+        {
+        }
+    }
+
     template sequence   e1000::create_device()
     {
         chunk Ethernet::create_device(PCI::Device pdev, Ethernet::Device rtx_ether_ctx)
@@ -376,8 +495,7 @@
             udelay(10);
 
             /* Now we can load its mac address (thanks minix code) */
-            int i = 0;
-            for (i = 0 /* < this is not generated! (cnorm bug) */; i < 3; ++i)
+            for (int i = 0; i < 3; ++i)
             {
                 rtx_e1000_register_write32(&${rtx_ether_ctx}->hw_ctx, E1000_EEPROM_READ, (i << 8) | 1);
 
@@ -420,6 +538,7 @@
         }
     }
 
+    /* TODO: make that a method of e1000::Context */
     template sequence   e1000::print_status(Ethernet::Device ctx)
     {
         chunk   LKM::prototypes()
@@ -466,17 +585,19 @@
      * ${e1000.init(E1000_STATUS); // didn't work, so we used the next line
      * reg_status = E1000_STATUS;
      * ${e1000::register_read32(rtx_ether_ctx->hw_ctx, reg_status)};
+     *
+     * TODO: make them methods of e1000::Context
      */
     template sequence   e1000::register_read32(e1000::Context ctx, e1000::Register reg_offset)
     {
         chunk   LKM::prototypes()
         {
-            static unsigned int    rtx_e1000_register_read32(struct rtx_e1000_ctx *, unsigned int);
+            static unsigned int    rtx_e1000_register_read32(const struct rtx_e1000_ctx *, unsigned int);
         }
 
         chunk   LKM::code()
         {
-            static unsigned int    rtx_e1000_register_read32(struct rtx_e1000_ctx *ctx, unsigned int reg_offset)
+            static unsigned int    rtx_e1000_register_read32(const struct rtx_e1000_ctx *ctx, unsigned int reg_offset)
             {
                 return ioread32(ctx->ioaddr + reg_offset);
             }
@@ -492,12 +613,12 @@
     {
         chunk   LKM::prototypes()
         {
-            static void rtx_e1000_register_write32(struct rtx_e1000_ctx *, unsigned int, unsigned int);
+            static void rtx_e1000_register_write32(const struct rtx_e1000_ctx *, unsigned int, unsigned int);
         }
 
         chunk   LKM::code()
         {
-            static void rtx_e1000_register_write32(struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
+            static void rtx_e1000_register_write32(const struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
             {
                 iowrite32(value, ctx->ioaddr + reg_offset);
             }
@@ -513,12 +634,12 @@
     {
         chunk   LKM::prototypes()
         {
-            static void rtx_e1000_register_set32(struct rtx_e1000_ctx *, unsigned int, unsigned int);
+            static void rtx_e1000_register_set32(const struct rtx_e1000_ctx *, unsigned int, unsigned int);
         }
 
         chunk   LKM::code()
         {
-            static void rtx_e1000_register_set32(struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
+            static void rtx_e1000_register_set32(const struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
             {
                 iowrite32(rtx_e1000_register_read32(ctx, reg_offset) | value, ctx->ioaddr + reg_offset);
             }
@@ -534,12 +655,12 @@
     {
         chunk   LKM::prototypes()
         {
-            static void rtx_e1000_register_unset32(struct rtx_e1000_ctx *, unsigned int, unsigned int);
+            static void rtx_e1000_register_unset32(const struct rtx_e1000_ctx *, unsigned int, unsigned int);
         }
 
         chunk   LKM::code()
         {
-            static void rtx_e1000_register_unset32(struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
+            static void rtx_e1000_register_unset32(const struct rtx_e1000_ctx *ctx, unsigned int reg_offset, unsigned int value)
             {
                 iowrite32(rtx_e1000_register_read32(ctx, reg_offset) & ~value, ctx->ioaddr + reg_offset);
             }
@@ -626,12 +747,18 @@
         }
     }
 
+    /* TODO: refactor
+     *
+     * Split into two method methods:
+     * - e1000::RxRing::init_rx()
+     * - e1000::TxRing::init_tx()
+     *
+     * Also it should use the new methods in Socket::SKbuff.
+     */
     template sequence   set_up_device(Ethernet::Device ctx)
     {
         chunk  ::CALL()
         {
-            typedef unsigned long int   dma_addr_t;
-
             /*
              * This part is documented in the Intel Gigabit Ethernet Controller
              * Software Developper manual. (You can find it in the doc/hardware
@@ -663,6 +790,8 @@
              *   E1000_CRCERRS to E1000_TSCTFC.
              */
 
+            int i;
+
             rtx_e1000_register_set32(hw_ctx, E1000_CTRL,
                                      E1000_CMD_ASDE |
                                      E1000_CMD_SLU);
@@ -676,7 +805,6 @@
             rtx_e1000_register_write32(hw_ctx, E1000_FCAL, 0);
             rtx_e1000_register_write32(hw_ctx, E1000_FCT, 0);
             rtx_e1000_register_write32(hw_ctx, E1000_FCTTV, 0);
-            int i = 0; /* CNorm workaround, the init part of for isn't generated */
             for (i = 0; i != 64; ++i)
                 rtx_e1000_register_write32(hw_ctx, E1000_CRCERRS + i * 4, 0);
             
@@ -719,7 +847,6 @@
 
             /* 2. Initialize the MTA */
 
-            i = 0; /* CNorm workaround, the init part of for isn't generated */
             for (i = 0; i != 128; ++i)
                 rtx_e1000_register_write32(hw_ctx, E1000_MTA + i * 4, 0);
 
@@ -733,7 +860,7 @@
             hw_ctx->rx_ring.base = dma_alloc_coherent(
                     &${ctx}->pci_dev->dev,
                     hw_ctx->rx_ring.size,
-                    (dma_addr_t *)&hw_ctx->rx_ring.dma_base,
+                    &hw_ctx->rx_ring.dma_base,
                     GFP_KERNEL);
             if (!hw_ctx->rx_ring.base)
             {
@@ -747,41 +874,37 @@
              * Allocate the skbuffs, map them for DMA, and write their address
              * in the corresponding descriptor.
              */
-            i = 0;
             for (i = 0; i != ${config.rx_ring_size}; ++i)
             {
-                hw_ctx->rx_ring.skbuffs[i] = netdev_alloc_skb(
+                hw_ctx->rx_ring.skbuffs[i].skbuff = netdev_alloc_skb(
                         ${ctx}->net_dev,
                         ${config.rx_buffer_len});
-                if (!hw_ctx->rx_ring.skbuffs[i])
+                if (!hw_ctx->rx_ring.skbuffs[i].skbuff)
                 {
                     ${Log::info("cannot allocate a skbuff for the rx ring")};
                     goto err_skbuffs_alloc;
                 }
-                hw_ctx->rx_ring.dma_skbuffs[i] = (void *)dma_map_single(
+                hw_ctx->rx_ring.skbuffs[i].dma_handle = dma_map_single(
                         &${ctx}->pci_dev->dev,
-                        hw_ctx->rx_ring.skbuffs[i]->data,
+                        hw_ctx->rx_ring.skbuffs[i].skbuff->data,
                         ${config.rx_buffer_len},
                         DMA_FROM_DEVICE);
-                /*
-                 * Either this fails because, when compiling with gcc because
-                 * the last argument is not of the correct type (dma_addr_t).
-                 * Or it fails because of the lack of CNorm Unstrict.
-                 */
-                if (dma_mapping_error(&${ctx}->pci_dev->dev, (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i]))
+                int dma_error = dma_mapping_error(&${ctx}->pci_dev->dev,
+                        hw_ctx->rx_ring.skbuffs[i].dma_handle);
+                if (dma_error)
                 {
                     ${Log::info("cannot dma-map a skbuff for the rx ring")};
                     goto err_skbuffs_map;
                 }
-                hw_ctx->rx_ring.base[i].buff_addr = (unsigned long int)cpu_to_le64(
-                        hw_ctx->rx_ring.dma_skbuffs[i]);
+                hw_ctx->rx_ring.base[i].buff_addr = cpu_to_le64(
+                        hw_ctx->rx_ring.skbuffs[i].skbuff);
             }
 
             ${Log::info("setup_device: skbuffs allocated")};
 
             /* 5. Save the emplacement and the size of the ring in RDBA/RDLEN */
-            rtx_e1000_register_write32(hw_ctx, E1000_RDBAL, (dma_addr_t)hw_ctx->rx_ring.dma_base & 0xffffffff);
-            rtx_e1000_register_write32(hw_ctx, E1000_RDBAH, (dma_addr_t)hw_ctx->rx_ring.dma_base >> 32);
+            rtx_e1000_register_write32(hw_ctx, E1000_RDBAL, hw_ctx->rx_ring.dma_base & 0xffffffff);
+            rtx_e1000_register_write32(hw_ctx, E1000_RDBAH, hw_ctx->rx_ring.dma_base >> 32);
             rtx_e1000_register_write32(hw_ctx, E1000_RDLEN, hw_ctx->rx_ring.size);
 
             /* 6. Setup RDH/RDT */
@@ -820,7 +943,7 @@
             hw_ctx->tx_ring.base = dma_alloc_coherent(
                     &${ctx}->pci_dev->dev,
                     hw_ctx->tx_ring.size,
-                    (dma_addr_t *)&hw_ctx->tx_ring.dma_base,
+                    &hw_ctx->tx_ring.dma_base,
                     GFP_KERNEL);
             if (!hw_ctx->rx_ring.base)
             {
@@ -831,13 +954,15 @@
             ${Log::info("setup_device: tx descriptors allocated")};
 
             /* 2. Save the emplacement and the size of the ring in TDBA/TDLEN */
-            rtx_e1000_register_write32(hw_ctx, E1000_TDBAL, (dma_addr_t)hw_ctx->tx_ring.dma_base & 0xffffffff);
-            rtx_e1000_register_write32(hw_ctx, E1000_TDBAH, (dma_addr_t)hw_ctx->tx_ring.dma_base >> 32);
+            rtx_e1000_register_write32(hw_ctx, E1000_TDBAL, hw_ctx->tx_ring.dma_base & 0xffffffff);
+            rtx_e1000_register_write32(hw_ctx, E1000_TDBAH, hw_ctx->tx_ring.dma_base >> 32);
             rtx_e1000_register_write32(hw_ctx, E1000_TDLEN, hw_ctx->tx_ring.size);
 
             /* 3. Setup TDH/TDT to zero: the queue is empty */
             rtx_e1000_register_write32(hw_ctx, E1000_TDH, 0);
             rtx_e1000_register_write32(hw_ctx, E1000_TDT, 0);
+            hw_ctx->tx_ring.head = 0;
+            hw_ctx->tx_ring.tail = 0;
 
             /* 4. Set TCTL.PSP and enable the transmitter */
             rtx_e1000_register_set32(hw_ctx, E1000_TCTL, E1000_TCTL_PSP|E1000_TCTL_PSP);
@@ -860,15 +985,15 @@
             {
                 dma_unmap_single(
                         &${ctx}->pci_dev->dev,
-                        (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i],
+                        hw_ctx->rx_ring.skbuffs[i].dma_handle,
                         ${config.rx_buffer_len},
                         DMA_FROM_DEVICE);
         err_skbuffs_map:
-                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i]);
+                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i].skbuff);
             }
 
             dma_free_coherent(&${ctx}->pci_dev->dev, hw_ctx->rx_ring.size,
-                    hw_ctx->rx_ring.base, (dma_addr_t)hw_ctx->rx_ring.dma_base);
+                    hw_ctx->rx_ring.base, hw_ctx->rx_ring.dma_base);
         err_rx_ring_alloc:
             return -ENOMEM;
 
@@ -876,12 +1001,15 @@
         }
     }
 
+    /* TODO:
+     *
+     * Refactor into two methods (one in RxRing and one in TxRing) and make use
+     * of the new methods in Socket::SKBuff.
+     */
     template sequence   free_rx_tx(Ethernet::Device ctx)
     {
         chunk   ::CALL()
         {
-            typedef unsigned long int   dma_addr_t;
-
             ${e1000::Context} *hw_ctx;
             hw_ctx = &${ctx}->hw_ctx;
 
@@ -890,18 +1018,17 @@
              * - Unmap and free the skbuffs;
              * - Free the descriptors array.
              */
-            int i = 0;
-            for (i = 0; i != ${config.rx_ring_size}; ++i)
+            for (int i = 0; i != ${config.rx_ring_size}; ++i)
             {
                 dma_unmap_single(
                         &${ctx}->pci_dev->dev,
-                        (dma_addr_t)hw_ctx->rx_ring.dma_skbuffs[i],
+                        (dma_addr_t)hw_ctx->rx_ring.skbuffs[i].dma_handle,
                         ${config.rx_buffer_len},
                         DMA_FROM_DEVICE);
-                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i]);
+                dev_kfree_skb(hw_ctx->rx_ring.skbuffs[i].skbuff);
             }
             dma_free_coherent(&${ctx}->pci_dev->dev, hw_ctx->rx_ring.size,
-                    hw_ctx->rx_ring.base, (dma_addr_t)hw_ctx->rx_ring.dma_base);
+                    hw_ctx->rx_ring.base, hw_ctx->rx_ring.dma_base);
             ${Log::info("free_rx_tx: rx ring free'ed")};
 
             /*
@@ -909,7 +1036,7 @@
              * - Free the descriptors array.
              */
             dma_free_coherent(&${ctx}->pci_dev->dev, hw_ctx->tx_ring.size,
-                    hw_ctx->tx_ring.base, (dma_addr_t)hw_ctx->tx_ring.dma_base);
+                    hw_ctx->tx_ring.base, hw_ctx->tx_ring.dma_base);
             ${Log::info("free_rx_tx: tx ring free'ed")};
         }
     }
@@ -930,4 +1057,92 @@
             }
         }
     }
+
+    template sequence   e1000::_xmit_tso_cksum_offload(Ethernet::Device ctx, Socket::SKBuff skb)
+    {
+        chunk   ::CALL()
+        {
+        }
+    }
+
+    template sequence   e1000::xmit(Ethernet::Device ctx, Socket::KernelSKBuff kernel_skb)
+    {
+        chunk   ::CALL()
+        {
+            /*
+             * Put packets on the TX ring, must return NETDEV_TX_OK or
+             * NETDEV_TX_BUSY.
+             */
+
+            ${Socket::SKBuff} skb;
+            ${e1000::Context} *hw_ctx;
+            ${e1000::TxRing} *tx_ring;
+            ${Device::Device} dev;
+
+            ${local.skb.init(kernel_skb)};
+            hw_ctx = &${ctx}->hw_ctx;
+            tx_ring = &hw_ctx->tx_ring;
+            dev = &${ctx}->pci_dev->dev;
+
+            ${Log::info("xmit: skbuff details:")};
+            /*
+             * skb is not expand on the bound C variable (should be rtx_skbuff),
+             * which is funny because it works for the sequence template call
+             * right after.
+             */
+            /*
+             * XXX: doesn't work (I tried to pass self explicitely too):
+             * ${local.skb.dump_infos()};
+             */
+            rtx_socket_skbuff_dump_infos(&skb);
+
+            /*
+             * The transmission is going to be several steps:
+             * 1. TCP Segmentation Offload & Checksum Offloading: pick a
+             *    descriptor from the tx ring and fill it as a context
+             *    descriptor to allow the card to slice into several packets
+             *    according to the MSS;
+             * 2. DMA Map the skbuff data as slices of 4096;
+             * 3. Signal the hardware that data is available via a tx desc.
+             */
+
+            /* XXX: same thing wanted to use: ${local.tx_ring.descriptors_remaining()} */
+            if (!rtx_e1000_tx_ring_descriptors_remaining(tx_ring))
+                return NETDEV_TX_BUSY;
+
+            /* 1. Offloading */
+
+            /* XXX: ${local.tx_ring.tso_cksum_offload(skb)}; */
+            if (rtx_e1000_tx_ring_tso_cksum_offload(tx_ring, &skb))
+            {
+                ${Log::info("xmit: the packet needs to be fragmented and/or checksummed but this not implemented yet!")};
+                goto err_offload;
+            }
+
+            /* 2. Map the data */
+
+            /* XXX: ${local.skb.map_to(local.dev)}; */
+            if (rtx_socket_skbuff_map(&skb, dev, DMA_TO_DEVICE))
+            {
+                ${Log::info("xmit: can't DMA map a SKbuff")};
+                goto err_skb_map_to;
+            }
+
+            /* 3. Update the TX Ring and signal the hardware */
+
+            /* XXX: ${local.tx_ring.put(skb)}; */
+            rtx_e1000_tx_ring_put(tx_ring, &skb);
+
+            /* XXX: ${local.tx_ring.start_xmit(hw_ctx)}; */
+            rtx_e1000_tx_ring_start_xmit(tx_ring, hw_ctx);
+
+            return NETDEV_TX_OK;
+
+        err_offload:
+        err_skb_map_to:
+            /* XXX: ${local.skb.unmap_to_and_free(local.dev)}; */
+            rtx_socket_skbuff_unmap_and_free(&skb, dev, DMA_TO_DEVICE);
+            return NETDEV_TX_OK;
+        }
+    }
 }
diff --git a/rathaxes/samples/e1000/e1000.rti b/rathaxes/samples/e1000/e1000.rti
--- a/rathaxes/samples/e1000/e1000.rti
+++ b/rathaxes/samples/e1000/e1000.rti
@@ -31,8 +31,15 @@
     provided type   TxRing
     {
         chunk       LKM::includes();
+        chunk       LKM::prototypes();
+        chunk       LKM::code();
         method      decl();
         method      init();
+
+        method      descriptors_remaining();
+        method      tso_cksum_offload(Socket::SKBuff);
+        method      put(Socket::SKBuff);
+        method      start_xmit(e1000::Context);
     }
 
     /*
@@ -51,6 +58,12 @@
         method      decl();
     }
 
+    provided type   TxDescriptorFlags
+    {
+        chunk       LKM::includes();
+        chunk       ::decl();
+    }
+
     provided sequence   create_device()
     {
         provided chunk  Ethernet::create_device(PCI::Device, Ethernet::Device);
@@ -109,6 +122,16 @@
         provided chunk  ::CALL();
     }
 
+    provided sequence   _xmit_tso_cksum_offload(Ethernet::Device, Socket::SKBuff)
+    {
+        provided chunk  ::CALL();
+    }
+
+    provided sequence   xmit(Ethernet::Device, Socket::KernelSKBuff)
+    {
+        provided chunk  ::CALL();
+    }
+
     provided sequence   register_read32(e1000::Context, e1000::Register)
     {
         provided chunk  LKM::prototypes();
diff --git a/rathaxes/samples/e1000/ethernet.blt b/rathaxes/samples/e1000/ethernet.blt
--- a/rathaxes/samples/e1000/ethernet.blt
+++ b/rathaxes/samples/e1000/ethernet.blt
@@ -1,6 +1,56 @@
 with Ethernet, PCI, LKM, Log
 {
-    template type Ethernet::Net()
+    template type   Ethernet::ProtocolId()
+    {
+        chunk LKM::prototypes()
+        {
+            static const char   *rtx_ethernet_protocol_id_to_str(unsigned short);
+        }
+
+        chunk LKM::data()
+        {
+            static const struct
+            {
+                const unsigned short    id;
+                const char              *name;
+            } rtx_ethernet_proto_table[] =
+            {
+                { ETH_P_IP,     "IPv4"  },
+                { ETH_P_IPV6,   "IPv6"  },
+                { ETH_P_ARP,    "ARP"   },
+            };
+        }
+
+        chunk LKM::code()
+        {
+            static const char   *rtx_ethernet_protocol_id_to_str(unsigned short proto_id)
+            {
+                for (int i = 0;
+                     i != sizeof(rtx_ethernet_proto_table[0]) / sizeof(rtx_ethernet_proto_table);
+                     i++)
+                    if (proto_id == rtx_ethernet_proto_table[i].id)
+                        return rtx_ethernet_proto_table[i].name;
+
+                return "Unknown";
+            }
+        }
+
+        chunk decl()
+        {
+            typedef unsigned short  rtx_ether_protocol_id;
+        }
+
+        chunk to_str()
+        {
+            rtx_ethernet_protocol_id_to_str(${self});
+        }
+
+        map
+        {
+        }
+    }
+
+    template type   Ethernet::AbstractDevice()
     {
         chunk LKM::includes()
         {
@@ -17,11 +67,6 @@
         }
     }
 
-    /*
-     * Unlike PCI::Device, Ethernet::Device doesn't match the struct net_device
-     * from Linux. Ethernet::Device is the type that we use in the private
-     * field of the struct net_device.
-     */
     template type   Ethernet::Device()
     {
         chunk LKM::includes()
@@ -47,15 +92,15 @@
                 struct pci_dev          *pci_dev;
                 struct net_device       *net_dev;
 
-                /* while waiting on issue #8 */
-                //${e1000::Context}       hw_ctx;
-                // In the long-term, this may disappear for a new concept allowing
-                // to embbed a descriptor defined and manipulated by the front-end
+                /*
+                 * In the long-term, this may disappear for a new concept allowing
+                 * to embbed a descriptor defined and manipulated by the front-end
+                 */
                 ${pointcut Ethernet::SubContext()};
             } *rtx_ethernet_dev_p;
         }
 
-        chunk ::init(Ethernet::Net net_dev, PCI::Device pci_dev)
+        chunk ::init(Ethernet::AbstractDevice net_dev, PCI::Device pci_dev)
         {
             ${self} = netdev_priv(${net_dev});
             /*
@@ -82,9 +127,8 @@
         {
             static int  rtx_ethernet_open(struct net_device *dev)
             {
-                struct rtx_ethernet_dev* rtx_ether_dev = netdev_priv(dev);
+                ${Ethernet::Device} rtx_ether_dev = netdev_priv(dev);
 
-                ${cast local.rtx_ether_dev as Ethernet::Device};
                 ${pointcut ::IMPLEMENTATION(local.rtx_ether_dev)};
 
                 return 0;
@@ -92,7 +136,7 @@
         }
     }
 
-    template sequence   Ethernet::send(Ethernet::Device dev, Socket::SKBuff skb)
+    template sequence   Ethernet::send(Ethernet::Device dev, Socket::KernelSKBuff skb)
     {
         chunk LKM::prototypes()
         {
@@ -101,13 +145,11 @@
 
         chunk LKM::code()
         {
-            static int  rtx_ethernet_xmit(struct sk_buff* skb, struct net_device *dev)
+            static int  rtx_ethernet_xmit(struct sk_buff* kernel_skb, struct net_device *net_dev)
             {
-                ${cast local.dev as Ethernet::Device};
-                ${cast local.skb as Socket::SKBuff};
-                ${pointcut ::IMPLEMENTATION(local.dev, local.skb)};
-
-                return 0;
+                ${Ethernet::Device} rtx_ethernet_dev = netdev_priv(net_dev);
+                ${cast local.kernel_skb as Socket::KernelSKBuff};
+                ${pointcut ::IMPLEMENTATION(local.rtx_ethernet_dev, local.kernel_skb)};
             }
         }
     }
@@ -123,9 +165,8 @@
         {
             static int  rtx_ethernet_close(struct net_device *dev)
             {
-                struct rtx_ethernet_dev* rtx_ether_dev = netdev_priv(dev);
+                ${Ethernet::Device} rtx_ether_dev = netdev_priv(dev);
 
-                ${cast local.rtx_ether_dev as Ethernet::Device};
                 ${pointcut ::IMPLEMENTATION(local.rtx_ether_dev)};
 
                 return 0;
@@ -148,11 +189,8 @@
         {
             static enum irqreturn   rtx_ethernet_interrupt_handler(int irq, void *dev_id)
             {
-                struct rtx_ethernet_dev* rtx_ether_dev;
-                struct rtx_e1000_ctx* ctx;
+                ${Ethernet::Device} rtx_ether_dev = dev_id;
 
-                rtx_ether_dev = dev_id;
-                ${cast local.rtx_ether_dev as Ethernet::Device};
                 ${pointcut ::IMPLEMENTATION(local.rtx_ether_dev)};
 
                 return IRQ_NONE;
diff --git a/rathaxes/samples/e1000/ethernet.rti b/rathaxes/samples/e1000/ethernet.rti
--- a/rathaxes/samples/e1000/ethernet.rti
+++ b/rathaxes/samples/e1000/ethernet.rti
@@ -1,16 +1,30 @@
 interface Ethernet : Socket, PCI, LKM
 {
-    provided type   Net
+     provided type   ProtocolId
+     {
+         chunk       LKM::prototypes();
+         chunk       LKM::data();
+         chunk       LKM::code();
+         method      decl();
+         method      to_str();
+     }
+
+    provided type   AbstractDevice
     {
         chunk       LKM::includes();
-        method       decl();
+        method      decl();
     }
 
+    /*
+     * Unlike PCI::Device, Ethernet::Device doesn't match the struct net_device
+     * from Linux. Ethernet::Device is the type that we use in the private
+     * field of the struct net_device.
+     */
     provided type   Device
     {
         chunk       LKM::includes();
         method      decl();
-        method      init(Ethernet::Net, PCI::Device);
+        method      init(Ethernet::AbstractDevice, PCI::Device);
         pointcut    Ethernet::SubContext();
     }
 
@@ -22,7 +36,7 @@
         provided chunk  LKM::code();
     }
 
-    required sequence   send(Ethernet::Device dev, Socket::SKBuff skb)
+    required sequence   send(Ethernet::Device, Socket::KernelSKBuff)
     {
         provided chunk  LKM::prototypes();
         provided chunk  LKM::code();
diff --git a/rathaxes/samples/e1000/lkm.rtx b/rathaxes/samples/e1000/lkm.rtx
--- a/rathaxes/samples/e1000/lkm.rtx
+++ b/rathaxes/samples/e1000/lkm.rtx
@@ -1,4 +1,4 @@
-device LKM use LKM, PCI, Ethernet, Log
+device LKM use LKM, PCI, Ethernet, Log, Socket
 {
     Ethernet::open(Ethernet::Device dev)
     {
@@ -43,9 +43,10 @@
         e1000::handle_interrupt(dev);
     }
 
-    Ethernet::send(Ethernet::Device dev, Socket::SKBuff skb)
+    Ethernet::send(Ethernet::Device dev, Socket::KernelSKBuff skb)
     {
         Log::info("we have one packet to transmit!");
+        e1000::xmit(dev, skb);
     }
 
     LKM::init()
@@ -79,4 +80,10 @@
      * 4096, 8192 and 16384 bytes:
      */
     e1000::rx_buffer_len = 2048;
+    /*
+     * 4096 bytes maximum per transmit descriptor is used on Linux and FreeBSD,
+     * 2048 on Minix and HelenOS, I can't find why. If I understand the Intel
+     * man correctly, the maximum should be 16288 (see section 3.3.3).
+     */
+     e1000::tx_max_data_per_desc = 4096;
 }
diff --git a/rathaxes/samples/e1000/socket.blt b/rathaxes/samples/e1000/socket.blt
--- a/rathaxes/samples/e1000/socket.blt
+++ b/rathaxes/samples/e1000/socket.blt
@@ -1,20 +1,153 @@
-with Socket, LKM
+with Socket, LKM, Device, Ethernet
 {
+    template type Socket::KernelSKBuff()
+    {
+        chunk LKM::includes()
+        {
+            #include <linux/skbuff.h>
+
+            static const ${Socket::KernelSKBuff} force_rtx_socket_kernel_skbuff_decl;
+        }
+
+        chunk ::decl()
+        {
+            typedef struct sk_buff  *rtx_socket_kernel_skbuff_p;
+        }
+
+        map
+        {
+        }
+    }
+
     template type Socket::SKBuff()
     {
         chunk LKM::includes()
         {
-            #include <linux/skbuff.h>
-            static const ${Socket::SKBuff} force_rtx_lnux_skbuf_decl;
+            static const ${Socket::SKBuff} force_rtx_socket_skbuff_decl;
         }
 
         chunk ::decl()
         {
-            struct sk_buff;
+            struct  rtx_socket_skbuff
+            {
+                struct sk_buff  *skbuff;
+                dma_addr_t      dma_handle;
+            };
         }
 
-        chunk ::init()
+        chunk   LKM::prototypes()
         {
+            static void rtx_socket_skbuff_dump_infos(struct rtx_socket_skbuff *);
+            static int  rtx_socket_skbuff_map(struct rtx_socket_skbuff *, struct device *, enum dma_data_direction);
+            static void rtx_socket_skbuff_unmap_and_free(struct rtx_socket_skbuff *, struct device *, enum dma_data_direction);
+        }
+
+        chunk   LKM::code()
+        {
+            static void rtx_socket_skbuff_dump_infos(struct rtx_socket_skbuff *self)
+            {
+                WARN_ON(!self->skbuff);
+
+                /*
+                 * We should use a Rathaxes log abstraction instead of pr_info here,
+                 * but Rathaxes doesn't support functions with a variable number of
+                 * arguments yet.
+                 */
+                ${Ethernet::ProtocolId} ethernet_proto = be16_to_cpu(self->skbuff->protocol);
+                static const char * const ip_summed_values[] = {
+                    "none", "unnecessary", "complete", "partial"
+                };
+                struct skb_shared_info *shinfo = skb_shinfo(self->skbuff);
+
+                pr_info(
+                        "\t protocol = %#-5x (%s)\n"
+                        "\t      len = %-5u data_len = %-5u head_len = %-5u\n"
+                        "\t nr_frags = %u\n"
+                        "\t gso_size = %-5u gso_segs = %-5u gso_type = %-5u\n"
+                        "\tip_summed = %d (%s)",
+                        ethernet_proto, rtx_ethernet_protocol_id_to_str(ethernet_proto) /* XXX: ${local.ethernet_proto.to_str()} */,
+                        self->skbuff->len, self->skbuff->data_len, skb_headlen(self->skbuff),
+                        shinfo->nr_frags, shinfo->gso_size, shinfo->gso_segs, shinfo->gso_type,
+                        self->skbuff->ip_summed, ip_summed_values[self->skbuff->ip_summed]
+                );
+            }
+
+            static int rtx_socket_skbuff_map(struct rtx_socket_skbuff *self,
+                                             struct device *dev,
+                                             enum dma_data_direction direction)
+            {
+                WARN_ON(!self->skbuff);
+                WARN_ON(!self->skbuff->data);
+                WARN_ON(self->dma_handle);
+
+                self->dma_handle = dma_map_single(
+                        dev,
+                        self->skbuff->data,
+                        skb_headlen(self->skbuff),
+                        direction);
+                int err = dma_mapping_error(dev, self->dma_handle);
+                if (err)
+                {
+                    self->dma_handle = 0;
+                    return err;
+                }
+                return 0;
+            }
+
+            static void rtx_socket_skbuff_unmap_and_free(struct rtx_socket_skbuff *self,
+                                                         struct device *dev,
+                                                         enum dma_data_direction direction)
+            {
+                WARN_ON(!self->skbuff);
+                WARN_ON(!self->skbuff->data);
+
+                if (self->dma_handle)
+                {
+                    dma_unmap_single(dev,
+                            self->dma_handle,
+                            skb_headlen(self->skbuff),
+                            direction);
+                    self->dma_handle = 0;
+                }
+                dev_kfree_skb_any(self->skbuff);
+                self->skbuff = 0;
+            }
+        }
+
+        /*
+         * XXX: the rathaxes argument kernel_skb is not actually bound to the
+         * correct C variable from Ethernet::send() (so I named it as the C
+         * variable I needed)
+         */
+        chunk ::init(Socket::KernelSKBuff kernel_skb)
+        {
+            ${self}.skbuff = kernel_skb;
+            ${self}.dma_handle = 0;
+        }
+
+        chunk dump_infos()
+        {
+            rtx_socket_skbuff_dump_infos(${self});
+        }
+
+        chunk   map_to(Device::Device dev)
+        {
+            rtx_socket_skbuff_map(${self}, ${dev}, DMA_TO_DEVICE);
+        }
+
+        chunk   map_from(Device::Device dev)
+        {
+            rtx_socket_skbuff_map(${self}, ${dev}, DMA_FROM_DEVICE);
+        }
+
+        chunk   unmap_to_and_free(Device::Device dev)
+        {
+            rtx_socket_skbuff_unmap_and_free(${self}, ${dev}, DMA_TO_DEVICE);
+        }
+
+        chunk   unmap_from_and_free(Device::Device dev)
+        {
+            rtx_socket_skbuff_unmap_and_free(${self}, ${dev}, DMA_FROM_DEVICE);
         }
 
         map
diff --git a/rathaxes/samples/e1000/socket.rti b/rathaxes/samples/e1000/socket.rti
--- a/rathaxes/samples/e1000/socket.rti
+++ b/rathaxes/samples/e1000/socket.rti
@@ -1,8 +1,22 @@
-interface Socket : LKM
+interface Socket : LKM, Device
 {
-    provided type Socket::SKBuff {
-        chunk   LKM::includes();
-        method  decl();
-        method  init();
+    provided type   KernelSKBuff
+    {
+        chunk       LKM::includes();
+        method      decl();
+    }
+
+    provided type   SKBuff
+    {
+        chunk       LKM::includes();
+        chunk       LKM::prototypes();
+        chunk       LKM::code();
+        method      decl();
+        method      init(Socket::KernelSKBuff);
+        method      dump_infos();
+        method      map_to(Device::Device);
+        method      map_from(Device::Device);
+        method      unmap_to_and_free(Device::Device);
+        method      unmap_from_and_free(Device::Device);
     }
 }