There is a reference-counting problem in the error paths where many pf states are generated. If, for some reason, a state generattion/insertion fails, the current PF code doesn't decrease the state counts for the rule that generated the state to compensate for the failure. This can result in errors with adaptive timeouts and max-state limits over time, as well as stopping rules from being freed from memory. This patch adds macros that perform symmetric increase/decreases of the reference counters for rules/anchors/etc. It could just as easily be done by adding inline code to do the same in every error path, or making a real function for the decrements like pf_src_tree_remove_state, as this isn't a fast path. This is to be applied after my previous reference counting fix applied to if_pfsync.c. Chris Pascoe 2004/07/20 Index: pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.457 diff -u -r1.457 pf.c --- pf.c 11 Jul 2004 15:54:21 -0000 1.457 +++ pf.c 20 Jul 2004 01:35:03 -0000 @@ -250,6 +250,24 @@ ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent : \ (k)->pfik_parent->pfik_parent) +#define STATE_INC_COUNTERS(s) \ + do { \ + s->rule.ptr->states++; \ + if (s->anchor.ptr != NULL) \ + s->anchor.ptr->states++; \ + if (s->nat_rule.ptr != NULL) \ + s->nat_rule.ptr->states++; \ + } while (0) + +#define STATE_DEC_COUNTERS(s) \ + do { \ + if (s->nat_rule.ptr != NULL) \ + s->nat_rule.ptr->states--; \ + if (s->anchor.ptr != NULL) \ + s->anchor.ptr->states--; \ + s->rule.ptr->states--; \ + } while (0) + static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); static __inline int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *); @@ -2713,14 +2731,10 @@ return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; s->log = r->log & 2; s->proto = IPPROTO_TCP; @@ -2799,6 +2813,7 @@ off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } @@ -2810,6 +2825,7 @@ ("pf_normalize_tcp_stateful failed on first pkt")); pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } @@ -2817,6 +2833,7 @@ pf_normalize_tcp_cleanup(s); REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else @@ -3054,14 +3071,10 @@ return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; s->log = r->log & 2; s->proto = IPPROTO_UDP; @@ -3110,6 +3123,7 @@ if (pf_insert_state(BOUND_IFACE(r, kif), s)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else @@ -3340,14 +3354,10 @@ return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; s->log = r->log & 2; s->proto = pd->proto; @@ -3390,6 +3400,7 @@ if (pf_insert_state(BOUND_IFACE(r, kif), s)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else @@ -3606,14 +3617,10 @@ return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; s->log = r->log & 2; s->proto = pd->proto; @@ -3652,6 +3659,7 @@ if (pf_insert_state(BOUND_IFACE(r, kif), s)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else Index: if_pfsync.c =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.c,v --- if_pfsync.c Tue Jul 20 16:49:57 2004 +++ if_pfsync.c Tue Jul 20 16:51:13 2004 @@ -198,6 +198,7 @@ st->rule.ptr = r; /* XXX get pointers to nat_rule and anchor */ + /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ r->states++; /* fill in the rest of the state entry */ @@ -226,6 +227,8 @@ if (pf_insert_state(kif, st)) { pfi_maybe_destroy(kif); + /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ + r->states--; pool_put(&pf_state_pl, st); return (EINVAL); }