Discussion:
Adding a bpf peers_present function and "peers added/gone" callback
(too old to reply)
Martin Husemann
2021-05-25 17:03:02 UTC
Permalink
Hey folks,

some drivers (i.e. ieee 802.11 ones) offer bpf tap points that require
quite a bit of work just for the bpf tap and are in a hot network path.

To optimize these at runtime (that is: only do the additional tap if there
are any active peers) I would like to borrow an idea from FreeBSD, but
somehow totally different:

First part is very simple:

- add a new function bpf_peers_present() that would check for a struct bpf_if*
if there are any active peers

If this function returns false, the device driver can skip the tap (and all
coresponding copying/setup work).

I would guess this part is non-controversial (and it could even be used
internally to shortcut some of the mtap functions).

Second part is a bit more tricky:

- in the new IEEE 802.11 stack (due to virtual access points being separated
from radio hardware, and modes of the VAP partially controlling the DLTs
used for some internal taps) it would be great to be able to provide
accumulated results for the bpf_peers_present() return result at the
radio level (that is where the driver would use it) - this avoids iterating
over all VAPs and collecting the data for each incoming packet.

For this to work, we need a callback that gets invoked whenever a peer
pops up or goes away on the bpf tap.

FreeBSD has a generic EVENTHANDLER interface that is very similar to our
pmf(9). Similar to us "abusing" pmf(9) events for various global machine
state events (lid close, "wifi" button pressed, ...) they also have several
random event types mixed into the list (including "process dumps core",
"kernel module loaded", "mount", "unmount", ...) and especially the one
I am after: bpf_track.

So I could mimic that design and add a pmf(9) event for all bpf_attachd()
and bpf_detachd() calls - but somehow this feels quite strange and I would
prefer to add a bpf specific callback here.

Something like:

#define BPF_TRACK_EVENT_ATTACH 1
#define BPF_TRACK_EVENT_DETACH 2
typedef void (*bpf_track_fn)(struct bpf_if*, struct ifnet *,
int dlt, int event);

void bpf_register_track_event(struct bpf_if*, bpf_track_fn*);
void bpf_deregister_track_event(struct bpf_if*, bpf_track_fn*);


Implementation is straight forward, any objections or comments on the
interface?

Martin

--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Martin Husemann
2021-06-01 13:33:17 UTC
Permalink
Post by Martin Husemann
Hey folks,
some drivers (i.e. ieee 802.11 ones) offer bpf tap points that require
quite a bit of work just for the bpf tap and are in a hot network path.
As probably was to be expected, this design did not survive the first
implementation :-)

I learned hat we already have a quite effective expression for "any peer
present on this tap point" - our code makes sure the struct bpf_if* is NULL
if not.

Still I prefer the more explicit version and would like to provide an inline
function to test for that.

Since most of the time the bpf_if* is NULL, the interface to the new event
register/deregister function needed tiny changes - it now passes the address
of the pointer (in bpf terms: the driver pointer, where at attach time the
bpf_d pointer is stored).

The patch below works for me. (Of course the debug changes/printfs
will go away before commit.)

Any comments?

Martin


Index: bpfdesc.h
===================================================================
RCS file: /cvsroot/src/sys/net/bpfdesc.h,v
retrieving revision 1.47
diff -u -p -r1.47 bpfdesc.h
--- bpfdesc.h 11 Jun 2020 13:36:20 -0000 1.47
+++ bpfdesc.h 1 Jun 2021 13:22:59 -0000
@@ -162,6 +162,14 @@ struct bpf_d_ext {


/*
+ * Record for each event tracker watching a tap point
+ */
+struct bpf_event_tracker {
+ SLIST_ENTRY(bpf_event_tracker) bet_entries;
+ void (*bet_notify)(struct bpf_if *, struct ifnet *, int, int);
+};
+
+/*
* Descriptor associated with each attached hardware interface.
*/
struct bpf_if {
@@ -179,6 +187,7 @@ struct bpf_if {
struct pslist_entry bif_iflist_entry;
struct pslist_head bif_dlist_head;
struct psref_target bif_psref;
+ SLIST_HEAD(, bpf_event_tracker) bif_trackers;
#endif
};

Index: bpf.h
===================================================================
RCS file: /cvsroot/src/sys/net/bpf.h,v
retrieving revision 1.75
diff -u -p -r1.75 bpf.h
--- bpf.h 11 Jun 2020 13:36:20 -0000 1.75
+++ bpf.h 1 Jun 2021 13:22:59 -0000
@@ -450,6 +450,11 @@ struct bpf_ops {

void (*bpf_mtap_softint_init)(struct ifnet *);
void (*bpf_mtap_softint)(struct ifnet *, struct mbuf *);
+
+ int (*bpf_register_track_event)(struct bpf_if **,
+ void (*)(struct bpf_if *, struct ifnet *, int, int));
+ int (*bpf_deregister_track_event)(struct bpf_if **,
+ void (*)(struct bpf_if *, struct ifnet *, int, int));
};

extern struct bpf_ops *bpf_ops;
@@ -501,6 +506,16 @@ bpf_change_type(struct ifnet *_ifp, u_in
bpf_ops->bpf_change_type(_ifp, _dlt, _hdrlen);
}

+static __inline bool
+bpf_peers_present(struct bpf_if *dp)
+{
+ /*
+ * Our code makes sure the driver visible point is NULL
+ * whenever there is no listener on this tap.
+ */
+ return dp != NULL;
+}
+
static __inline void
bpf_detach(struct ifnet *_ifp)
{
@@ -535,6 +550,24 @@ bpf_mtap_softint(struct ifnet *_ifp, str
bpf_ops->bpf_mtap_softint(_ifp, _m);
}

+static __inline int
+bpf_register_track_event(struct bpf_if **_dp,
+ void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
+{
+ if (bpf_ops->bpf_register_track_event == NULL)
+ return ENXIO;
+ return bpf_ops->bpf_register_track_event(_dp, _fun);
+}
+
+static __inline int
+bpf_deregister_track_event(struct bpf_if **_dp,
+ void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
+{
+ if (bpf_ops->bpf_deregister_track_event == NULL)
+ return ENXIO;
+ return bpf_ops->bpf_deregister_track_event(_dp, _fun);
+}
+
void bpf_setops(void);

void bpf_ops_handover_enter(struct bpf_ops *);
@@ -560,6 +593,12 @@ u_int bpf_filter(const struct bpf_insn *

u_int bpf_filter_with_aux_data(const struct bpf_insn *, const u_char *, u_int, u_int, const struct bpf_aux_data *);

+/*
+ * events to be tracked by bpf_register_track_event callbacks
+ */
+#define BPF_TRACK_EVENT_ATTACH 1
+#define BPF_TRACK_EVENT_DETACH 2
+

__END_DECLS

Index: bpf.c
===================================================================
RCS file: /cvsroot/src/sys/net/bpf.c,v
retrieving revision 1.239
diff -u -p -r1.239 bpf.c
--- bpf.c 18 Dec 2020 01:31:49 -0000 1.239
+++ bpf.c 1 Jun 2021 13:22:59 -0000
@@ -461,6 +461,7 @@ bad:
static void
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
{
+ struct bpf_event_tracker *t;

KASSERT(mutex_owned(&bpf_mtx));
KASSERT(mutex_owned(d->bd_mtx));
@@ -473,6 +474,11 @@ bpf_attachd(struct bpf_d *d, struct bpf_
BPFIF_DLIST_WRITER_INSERT_HEAD(bp, d);

*bp->bif_driverp = bp;
+
+ SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
+ t->bet_notify(bp, bp->bif_ifp, bp->bif_dlt,
+ BPF_TRACK_EVENT_ATTACH);
+ }
}

/*
@@ -482,6 +488,7 @@ static void
bpf_detachd(struct bpf_d *d)
{
struct bpf_if *bp;
+ struct bpf_event_tracker *t;

KASSERT(mutex_owned(&bpf_mtx));
KASSERT(mutex_owned(d->bd_mtx));
@@ -522,7 +529,13 @@ bpf_detachd(struct bpf_d *d)
*/
*d->bd_bif->bif_driverp = NULL;
}
+
d->bd_bif = NULL;
+
+ SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
+ t->bet_notify(bp, bp->bif_ifp, bp->bif_dlt,
+ BPF_TRACK_EVENT_DETACH);
+ }
}

static void
@@ -2125,6 +2138,7 @@ _bpfattach(struct ifnet *ifp, u_int dlt,
BPF_IFLIST_ENTRY_INIT(bp);
PSLIST_INIT(&bp->bif_dlist_head);
psref_target_init(&bp->bif_psref, bpf_psref_class);
+ SLIST_INIT(&bp->bif_trackers);

BPF_IFLIST_WRITER_INSERT_HEAD(bp);

@@ -2132,8 +2146,8 @@ _bpfattach(struct ifnet *ifp, u_int dlt,

bp->bif_hdrlen = hdrlen;
mutex_exit(&bpf_mtx);
-#if 0
- printf("bpf: %s attached\n", ifp->if_xname);
+#if 1
+ printf("bpf: %s attached with dlt %x\n", ifp->if_xname, dlt);
#endif
}

@@ -2171,6 +2185,10 @@ _bpfdetach(struct ifnet *ifp)
int s;

mutex_enter(&bpf_mtx);
+#if 1
+ printf("bpf: %s detached\n", ifp->if_xname);
+#endif
+
/* Nuke the vnodes for any open instances */
again_d:
BPF_DLIST_WRITER_FOREACH(d) {
@@ -2196,6 +2214,14 @@ _bpfdetach(struct ifnet *ifp)
pserialize_perform(bpf_psz);
psref_target_destroy(&bp->bif_psref, bpf_psref_class);

+ while (!SLIST_EMPTY(&bp->bif_trackers)) {
+ struct bpf_event_tracker *t =
+ SLIST_FIRST(&bp->bif_trackers);
+ SLIST_REMOVE_HEAD(&bp->bif_trackers,
+ bet_entries);
+ kmem_free(t, sizeof(*t));
+ }
+
BPF_IFLIST_ENTRY_DESTROY(bp);
if (bp->bif_si != NULL) {
/* XXX NOMPSAFE: assumed running on one CPU */
@@ -2523,10 +2549,69 @@ SYSCTL_SETUP(sysctl_net_bpf_setup, "bpf

}

+static int
+_bpf_register_track_event(struct bpf_if **driverp,
+ void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
+{
+ struct bpf_if *bp;
+ struct bpf_event_tracker *t;
+ int ret = ENOENT;
+
+ t = kmem_zalloc(sizeof(*t), KM_SLEEP);
+ if (!t)
+ return ENOMEM;
+ t->bet_notify = _fun;
+
+ mutex_enter(&bpf_mtx);
+ BPF_IFLIST_WRITER_FOREACH(bp) {
+ if (bp->bif_driverp != driverp)
+ continue;
+ SLIST_INSERT_HEAD(&bp->bif_trackers, t, bet_entries);
+ ret = 0;
+ break;
+ }
+ mutex_exit(&bpf_mtx);
+
+ return ret;
+}
+
+static int
+_bpf_deregister_track_event(struct bpf_if **driverp,
+ void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
+{
+ struct bpf_if *bp;
+ struct bpf_event_tracker *t = NULL;
+ int ret = ENOENT;
+
+ mutex_enter(&bpf_mtx);
+ BPF_IFLIST_WRITER_FOREACH(bp) {
+ if (bp->bif_driverp != driverp)
+ continue;
+ SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
+ if (t->bet_notify == _fun) {
+ ret = 0;
+ break;
+ }
+ }
+ if (ret == 0)
+ break;
+ }
+ if (ret == 0 && t && t->bet_notify == _fun) {
+ SLIST_REMOVE(&bp->bif_trackers, t, bpf_event_tracker,
+ bet_entries);
+ }
+ mutex_exit(&bpf_mtx);
+ if (ret == 0)
+ kmem_free(t, sizeof(*t));
+ return ret;
+}
+
struct bpf_ops bpf_ops_kernel = {
.bpf_attach = _bpfattach,
.bpf_detach = _bpfdetach,
.bpf_change_type = _bpf_change_type,
+ .bpf_register_track_event = _bpf_register_track_event,
+ .bpf_deregister_track_event = _bpf_deregister_track_event,

.bpf_mtap = _bpf_mtap,
.bpf_mtap2 = _bpf_mtap2,


--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Loading...