@@ -55,6 +55,7 @@
#define SO_ADD_HW_FILTERING_RULE 113
#define SO_DEL_HW_FILTERING_RULE 114
#define SO_SET_PACKET_CONSUMER_MODE 115
+#define SO_DEACTIVATE_RING 116
/* Get */
#define SO_GET_RING_VERSION 120
@@ -350,9 +351,14 @@
typedef struct flowSlotInfo {
u_int16_t version, sample_rate;
u_int32_t min_num_slots, slot_len, data_len, tot_mem;
- u_int64_t tot_pkts, tot_lost, tot_insert, tot_read;
+ u_int64_t tot_pkts, tot_lost, tot_insert;
+ u_int32_t insert_off /* managed by kernel */;
u_int64_t tot_fwd_ok, tot_fwd_notok;
- u_int32_t insert_off /* managed by kernel */, remove_off /* managed by userland */;
+ /* <-- 64 bytes here, should be enough to avoid some L1 VIVT coherence issues (32 ~ 64bytes lines) */
+ char padding[128];
+ /* <-- 128 bytes here, should be enough to avoid false sharing in most L2 (64 ~ 128bytes lines) */
+ u_int64_t tot_read;
+ u_int32_t remove_off /* managed by userland */;
} FlowSlotInfo;
/* *********************************** */
|
@@ -80,7 +80,6 @@
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
-#include <asm/cacheflush.h>
#include <linux/proc_fs.h>
#include <linux/if_arp.h>
#include <net/xfrm.h>
@@ -94,7 +93,7 @@
#include <linux/pf_ring.h>
-#define RING_DEBUG
+/* #define RING_DEBUG */
#ifndef SVN_REV
#define SVN_REV ""
@@ -375,7 +374,12 @@
inline u_int get_num_ring_free_slots(struct ring_opt * pfr)
{
- return(pfr->slots_info->min_num_slots - num_queued_pkts(pfr));
+ u_int32_t nqpkts = num_queued_pkts(pfr);
+
+ if(nqpkts < (pfr->slots_info->min_num_slots))
+ return(pfr->slots_info->min_num_slots - nqpkts);
+ else
+ return(0);
}
/* ********************************** */
@@ -737,7 +741,7 @@
rlen += sprintf(buf + rlen, "Slot version : %d\n", RING_FLOWSLOT_VERSION);
rlen += sprintf(buf + rlen, "Capture TX : %s\n", enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
rlen += sprintf(buf + rlen, "IP Defragment : %s\n", enable_ip_defrag ? "Yes" : "No");
- rlen += sprintf(buf + rlen, "Transparent mode : %s\n", (transparent_mode != driver2pf_ring_non_transparent) ? "Yes" : "No");
+ rlen += sprintf(buf + rlen, "Transparent mode : %s\n", (transparent_mode == standard_linux_path ? "Yes (mode 0)" : (transparent_mode == driver2pf_ring_transparent ? "Yes (mode 1)" : "No (mode 2)")));
rlen += sprintf(buf + rlen, "Total rings : %d\n", ring_table_size);
rlen += sprintf(buf + rlen, "Total plugins : %d\n", plugin_registration_size);
} else {
@@ -878,7 +882,7 @@
*/
static int ring_alloc_mem(struct sock *sk)
{
- u_int the_slot_len;
+ u_int the_slot_len, num_pages;
u_int32_t tot_mem;
struct ring_opt *pfr = ring_sk(sk);
@@ -911,7 +915,12 @@
the_slot_len = pfr->slot_header_len + pfr->bucket_len;
tot_mem = PAGE_ALIGN(sizeof(FlowSlotInfo) + min_num_slots * the_slot_len);
- pfr->ring_memory = vmalloc_32(tot_mem);
+
+ /* Alignment necessary on ARM platforms */
+ num_pages = tot_mem / PAGE_SIZE;
+ num_pages += (num_pages + (SHMLBA-1)) % SHMLBA;
+
+ pfr->ring_memory = vmalloc_user(num_pages*PAGE_SIZE);
if(pfr->ring_memory != NULL) {
#if defined(RING_DEBUG)
@@ -1579,21 +1588,6 @@
/* ********************************** */
-void flush_packet_memory(u8 *start_addr, u_int len) {
- struct page *p_start, *p_end;
- u8 *end_addr = start_addr + len;
-
- p_start = vmalloc_to_page(start_addr);
- p_end = vmalloc_to_page(end_addr);
-
- while(p_start <= p_end) {
- flush_dcache_page(p_start);
- p_start++;
- }
-}
-
-/* ********************************** */
-
/*
Generic function for copying either a skb or a raw
memory block to the ring buffer
@@ -1604,7 +1598,7 @@
int displ, int offset, void *plugin_mem,
void *raw_data, uint raw_data_len) {
char *ring_bucket;
- u_int32_t off, taken, bytes_to_flush = pfr->slot_header_len;
+ u_int32_t off, taken;
if(pfr->ring_slots == NULL) return;
@@ -1632,7 +1626,6 @@
if((plugin_mem != NULL) && (offset > 0)) {
memcpy(&ring_bucket[pfr->slot_header_len], plugin_mem, offset);
- bytes_to_flush += offset;
}
if(hdr->caplen > 0) {
@@ -1642,8 +1635,6 @@
pfr->slot_header_len);
#endif
skb_copy_bits(skb, -displ, &ring_bucket[pfr->slot_header_len + offset], hdr->caplen);
-
- bytes_to_flush += hdr->caplen;
} else {
if(hdr->extended_hdr.parsed_header_len >= pfr->bucket_len) {
static u_char print_once = 0;
@@ -1659,7 +1650,6 @@
/* Raw data copy mode */
raw_data_len = min(raw_data_len, pfr->bucket_len); /* Avoid overruns */
memcpy(&ring_bucket[pfr->slot_header_len], raw_data, raw_data_len); /* Copy raw data if present */
- bytes_to_flush += raw_data_len;
hdr->len = hdr->caplen = raw_data_len, hdr->extended_hdr.if_index = FAKE_PACKET;
/* printk("[PF_RING] Copied raw data at slot with offset %d [len=%d]\n", off, raw_data_len); */
}
@@ -1672,14 +1662,13 @@
printk("[PF_RING] ==> insert_off=%d\n", pfr->slots_info->insert_off);
#endif
+ /*
+ NOTE: smp_* barriers are _compiler_ barriers on UP, mandatory barriers on SMP
+ a consumer _must_ see the new value of tot_insert only after the buffer update completes
+ */
+ smp_wmb();
pfr->slots_info->tot_insert++;
- /* Flush data to mmap-ed memory area */
- // smp_wmb();
- //flush_packet_memory(ring_bucket, bytes_to_flush);
- //flush_packet_memory((u8*)pfr->slots_info, sizeof(FlowSlotInfo));
- // smp_mb();
-
write_unlock_bh(&pfr->ring_index_lock);
if(waitqueue_active(&pfr->ring_slots_waitqueue))
@@ -3137,14 +3126,18 @@
start = vma->vm_start;
+#if defined(RING_DEBUG)
printk("[PF_RING] do_memory_mmap(mode=%d, size=%lu, ptr=%p)\n", mode, size, ptr);
+#endif
while(size > 0) {
int rc;
if(mode == 0) {
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
- rc = vm_insert_page(vma, start, vmalloc_to_page(ptr));
+ // rc = vm_insert_page(vma, start, vmalloc_to_page(ptr));
+ rc = remap_vmalloc_range(vma, ptr, 0);
+ break;
#else
rc = remap_pfn_range(vma, start, kvirt_to_pa((unsigned long)ptr), PAGE_SIZE, PAGE_SHARED);
#endif
@@ -3820,7 +3813,7 @@
case SO_ATTACH_FILTER:
ret = -EINVAL;
-#if defined(RING_DEBUG)
+#if !defined(RING_DEBUG)
printk("[PF_RING] BPF filter (%d)\n", 0);
#endif
@@ -3831,7 +3824,7 @@
ret = -EFAULT;
-#if defined(RING_DEBUG)
+#if !defined(RING_DEBUG)
printk("[PF_RING] BPF filter (%d)\n", 1);
#endif
/*
@@ -3846,9 +3839,8 @@
break;
/* Fix below courtesy of Noam Dev <noamdev@gmail.com> */
- fsize = sizeof(struct sock_filter) * fprog.len;
- filter =
- kmalloc(fsize + sizeof(struct sk_filter), GFP_KERNEL);
+ fsize = sizeof(struct sock_filter) * fprog.len;
+ filter = kmalloc(fsize + sizeof(struct sk_filter), GFP_KERNEL);
if(filter == NULL) {
ret = -ENOMEM;
@@ -3873,8 +3865,8 @@
write_unlock(&pfr->ring_rules_lock);
ret = 0;
-#if defined(RING_DEBUG)
- printk("[PF_RING] BPF filter attached succesfully [len=%d]\n",
+#if !defined(RING_DEBUG)
+ printk("[PF_RING] BPF filter attached successfully [len=%d]\n",
filter->len);
#endif
}
@@ -4317,6 +4309,12 @@
found = 1, pfr->ring_active = 1;
break;
+ case SO_DEACTIVATE_RING:
+ if(debug)
+ printk("[PF_RING] * SO_DEACTIVATE_RING *\n");
+ found = 1, pfr->ring_active = 0;
+ break;
+
case SO_RING_BUCKET_LEN:
if(optlen != sizeof(u_int32_t))
return -EINVAL;
|