Hi Christof! Thanks for - splitting the original merge proposal into smaller chunks - providing hard numbers to show that we really want theses changes I don't have a test setup, so I couldn't test this feature. Code comments only. > === modified file 'firewall/conntrack.c' > --- firewall/conntrack.c 2011-03-22 16:37:54 +0000 > +++ firewall/conntrack.c 2011-03-22 16:37:54 +0000 > @@ -340,23 +346,154 @@ > } > > /** > - * Insert an address into a list of addresses. If same address exists already, > - * the update_id is replaced with the new value. > - * > - * @param addr_list the address list > - * @param addr the address to be added > - * @param upd_id update id > - * > - * @return the address list > - */ > -static struct slist *update_esp_address(struct slist *addr_list, > - const struct in6_addr *addr, > - const uint32_t *upd_id) > -{ > - struct esp_address *esp_addr = get_esp_address(addr_list, addr); > + * Set up or remove iptables rules to bypass userspace processing of the > + * SPI/destination pairs as specified by @a esp_tuple and @a dest. > + * This can greatly improve firewall throughput. > + * > + * @param esp_tuple Determines the SPI. > + * @param dest The corresponding destination address to bypass. May be > + * a IPv6-mapped IPv4 address. > + * @param insert Insert new rule if true, remove existing if false. > + * @return 0 if rules were modified, non-zero otherwise. > + * > + * @note This feature may be turned off completely by the -u command line option. > + * It is also automatically deactivated for connections that demand > + * more advanced connection tracking. > + * In these cases, -1 is returned even though there was not even an > + * attempt to modify rules. > + * > + * @note This interferes, in one way or another, with userspace_ipsec, > + * Relay, LSI, midauth, lightweight-update and esp_prot. Care was > + * taken to not break these features though. > + * > + * @see update_esp_address() > + * @see free_esp_tuple() > + * @see ::esp_speedup > + */ > +static int hip_fw_manage_esp_rule(const struct esp_tuple *const esp_tuple, > + const struct in6_addr *const dest, bool insert) [M] 'const bool insert' for const correctness > +/** > + * Insert a destination address into an esp_tuple. If same address exists already, > + * the update_id is replaced with the new value instead. > + * > + * @param esp_tuple the esp tuple > + * @param addr the address to be added > + * @param upd_id update id > + */ > +static void update_esp_address(struct esp_tuple *esp_tuple, > + const struct in6_addr *addr, > + const uint32_t *upd_id) [M] const correctness > @@ -367,7 +504,7 @@ > *esp_addr->update_id = *upd_id; > } > HIP_DEBUG("update_esp_address: found and updated\n"); > - return addr_list; > + return; > } > esp_addr = malloc(sizeof(struct esp_address)); > memcpy(&esp_addr->dst_addr, addr, sizeof(struct in6_addr)); [H] not your code, but the return value of 'malloc()' must be checked against 'NULL'. [L] not your code, but the 'memcpy()' could be replaced with a plain assignment. > @@ -448,7 +588,7 @@ > * @param data the connection-related data to be inserted [M] the 'ctx' parameter is not documented > * @see remove_connection > */ > -static void insert_new_connection(const struct hip_data *data) > +static void insert_new_connection(const struct hip_data *data, struct hip_fw_context *ctx) [M] 'const struct hip_data *const data' and 'const struct hip_fw_context *const ctx' for const correctness > { > struct connection *connection = NULL; > > @@ -457,6 +597,7 @@ > connection = calloc(1, sizeof(struct connection)); [H] not your code but the return value of 'calloc()' needs to be checked against 'NULL' > @@ -1983,6 +2108,173 @@ > } > > /** > + * Parse one line of iptables -nvL formatted output, and extract > + * packet count, SPI and destination IP if successful. [L] What exactly does 'successful' mean? Can I feed a correctly formatted line to this function and it could still fail (e.g. due to memory allocation problems or so)? Or does 'successful' essentially mean 'correctly formatted input'? > + * This takes into account all kinds of rules that can are created by > + * hip_fw_manage_esp_rule(). [L] It would be helpful to document which input formats the current implementation supports. 'iptables -nvL' is not sufficient because versions and translations might vary. > + * > + * @param input The line to be parsed. > + * @param packet_count Out: receives the packet count (first column). > + * @param spi Out: receives the SPI. > + * @param spi Out: receives the destination IP. [M] '@param dest' I presume? > + * @return true if rule was valid and output was written, > + * false otherwise [L] Again, when is a rule 'valid'? > + * > + * @see detect_esp_rule_activity() > + */ > +static bool parse_iptables_esp_rule(const char *const input, > + unsigned int *const packet_count, > + uint32_t *const spi, > + struct in6_addr *const dest) > +{ > + static const char u32_prefix[] = "u32 0x4&0x1fff=0x0&&0x0>>0x16&0x3c@0x8=0x"; > + > + /* > + * In iptables output, one column is optional. So we try the long > + * format first and fall back to the shorter one (see sscanf call > + * below). > + * The %45s format is used here because 45 is the maximum IPv6 address > + * length, considering all variations (i.e. INET6_ADDRSTRLEN - 1). > + */ > + static const char *formats[] = { "%u %*u %*s %*s %*2[!f-] %*s %*s %*s %45s", > + "%u %*u %*s %*s %*s %*s %*s %45s" }; > + > + char ip[INET6_ADDRSTRLEN]; > + const char *str_spi; > + > + // theres's two ways of specifying SPIs in a rule > + // (see hip_fw_manage_esp_rule) > + > + if ((str_spi = strstr(input, "spi:"))) { > + // non-UDP > + if (sscanf(str_spi, "spi:%u", spi) < 1) { > + HIP_ERROR("Unexpected iptables output: '%s'\n", input); [L] nit: input is not necessarily iptables output, it's just input to this function. Furthermore, it might be already very helpful for debugging to have an error message that is more explicit about what exactly is 'unexpected' here. > + return false; > + } > + } else if ((str_spi = strstr(input, u32_prefix))) { > + // UDP > + // spi follows u32_prefix string as a hex number > + // (always host byte order) > + if (sscanf(&str_spi[sizeof(u32_prefix) - 1], "%x", spi) < 1) { > + HIP_ERROR("Unexpected iptables output: '%s'\n", input); [L] nit: input is not necessarily iptables output, it's just input to this function. Furthermore, it might be already very helpful for debugging to have an error message that is more explicit about what exactly is 'unexpected' here. > + return false; > + } > + } else { > + // no SPI specified, so it's no ESP rule > + return false; > + } > + > + // grab packet count and destination IP. > + if (sscanf(input, formats[0], packet_count, ip) < 2) { > + // retry with alternative format before we give up > + if (sscanf(input, formats[1], packet_count, ip) < 2) { > + HIP_ERROR("Unexpected iptables output: '%s'\n", input); [L] nit: input is not necessarily iptables output, it's just input to this function. Furthermore, it might be already very helpful for debugging to have an error message that is more explicit about what exactly is 'unexpected' here. > + return false; > + } > + } > + > + // IP not needed, unless there was activity [M] This case distinction should definitely be documented > + if (*packet_count > 0) { > + char *slash; > + > + // IP may be in /128 format, strip the suffix > + if ((slash = strchr(ip, '/'))) { > + *slash = '\0'; > + } > + > + // parse destination IP; try IPv6 first, then IPv4 > + if (!inet_pton(AF_INET6, ip, dest)) { > + struct in_addr addr4; > + if (!inet_pton(AF_INET, ip, &addr4)) { > + HIP_ERROR("Unexpected iptables output: '%s'\n", input); [L] nit: as above > + HIP_ERROR("Can't parse destination IP: %s\n", ip); > + return false; > + } > + > + IPV4_TO_IPV6_MAP(&addr4, dest); > + } > + } > + > + return true; > +} > + > +/** > + * Update timestamps of all ESP tuples where corresponding iptables rules' > + * packet counters are non-zero. > + * Currently, this works by parsing the output given by @a cmd, which is > + * expected to have `iptables --nvL' format. [L] 'iptables -nvL'? > + * > + * @param cmd Command line to capture output from. [M] The parsing code is very closely tied to a very small number of command lines that make sense with it. Would it make sense to hard code the command line right away? > + * @param now We consider this the current time. > + * @return Number of rules that were identified with an esp tuple. > + * Not necessarily the number of tuples updated. > + * > + * @note This function doesn't clear the packet counters itself. [M] Reading and zeroing the packet counters should be done atomically with a single command line invocation. Otherwise, a packet might arrive in between and the ESP rule would be deleted even though it was still active. > + * > + * @todo De-uglify this. You may be tempted to statically link in libiptc, > + * and I'd generally approve of it because while it was never meant > + * to be used publicly, quite some projects have relied on it without > + * burning their fingers too badly for a long time now. But on the > + * other hand, there's the impending nftables release that will render > + * all your hard work obsolete anyway. I'd rather suggest waiting for > + * a post-alpha release of libnl_nft before wasting your time... > + * --cmroz, oct 2010 [M] I find this confusing. First, the @todo says that this functionality should not be implemented as is but via libiptc instead. Then it says the implementation should stay the way it is. So is this really a @todo? Or is the @todo the update to libnl_nft? > + */ > +static unsigned int detect_esp_rule_activity(const char *const cmd, > + const time_t now) > +{ > + > + unsigned int ret = 0; > + bool chain_ok = false; > + char bfr[256]; > + FILE *p; > + > + if (!(p = popen(cmd, "r"))) { > + HIP_ERROR("popen(\"%s\"): %s\n", cmd, strerror(errno)); > + return 0; > + } > + > + while (fgets(bfr, sizeof(bfr), p)) { > + if (strncmp(bfr, "Chain", 5) == 0) { > + chain_ok = (strncmp(bfr, "Chain HIPFW-INPUT", 17) == 0) || > + (strncmp(bfr, "Chain HIPFW-OUTPUT", 18) == 0) || > + (strncmp(bfr, "Chain HIPFW-FORWARD", 19) == 0); [L] Would it make sense to let the 'iptables' command list only these three chains? That would help with the atomic zeroing of the packet counts, too. > === modified file 'firewall/firewall_defines.h' > --- firewall/firewall_defines.h 2011-03-22 16:37:54 +0000 > +++ firewall/firewall_defines.h 2011-03-22 16:37:54 +0000 > @@ -129,6 +131,7 @@ > int direction; > struct connection *connection; > int state; > + int hook; [L] Even though this is not done for the other fields, it'd be great to get documentation on this new field. What it's purpose is, where it is used, etc. > @@ -141,6 +144,8 @@ > int verify_responder; > int state; > time_t timestamp; > + /* members needed for iptables setup */ > + bool udp_encap; /**< uses udp-nat? */ [L] To me as a developer who is not the author of this code, this piece of documentation is not very helpful. Please elaborate or remove it. > === modified file 'firewall/helpers.c' > --- firewall/helpers.c 2011-01-11 13:59:46 +0000 > +++ firewall/helpers.c 2011-03-22 16:37:54 +0000 > @@ -84,10 +86,43 @@ > * > * @param command The system command. The caller of this function must take > * care that command does not contain malicious code. > - */ > -void system_print(const char *command) > -{ > - if (system(command) == -1) { > - HIP_ERROR("Could not execute system command %s", command); > - } > + * @return Exit code on success, -1 on failure. > + */ > +int system_print(const char *command) [M] const correctness > +{ > + int ret; > + > + if ((ret = system(command)) == -1) { > + HIP_ERROR("Could not execute system command `%s'", command); [M] misleading. The command (not 'system command') is executed alright, it just returns an error. > + return -1; > + } > + > + HIP_DEBUG("$ %s -> %d\n", command, WEXITSTATUS(ret)); > + > + return WEXITSTATUS(ret); > +} > + > +/** > + * printf()-like wrapper arount system_print. > + * > + * @param command The system command. This is a printf format string. > + * The caller of this function must take care that command > + * does not contain malicious code. > + * @return Exit code on success, -1 on failure. > + */ > +int system_printf(const char *command, ...) [M] const correctness > +{ > + char bfr[196]; [M] why 196? > + > + va_list vargs; > + va_start(vargs, command); > + > + if (vsnprintf(bfr, sizeof(bfr), command, vargs) <= 0) { [M] The return value of vsnprintf() must be checked to make sure that 'command' was not truncated due to insufficient buffer space in 'bfr'. Blindly executing a potentially truncated command sounds like an invitation to a pretty interesting bug party, especially since the firewall is running as root, I think. > === modified file 'firewall/helpers.h' > --- firewall/helpers.h 2010-10-15 15:29:14 +0000 > +++ firewall/helpers.h 2011-03-22 16:37:54 +0000 > @@ -30,6 +30,7 @@ > > const char *addr_to_numeric(const struct in6_addr *addrp); > struct in6_addr *numeric_to_addr(const char *num); > -void system_print(const char *command); > +int system_print(const char *command); > +int system_printf(const char *command, ...); > > #endif /* HIP_FIREWALL_HELPERS_H */ > [L] all these 'helper' changes could/should be committed separately as they are not specific to this branch. > === modified file 'test/firewall/conntrack.c' > --- test/firewall/conntrack.c 2011-03-22 16:37:54 +0000 > +++ test/firewall/conntrack.c 2011-03-22 16:37:54 +0000 > @@ -33,18 +33,22 @@ > #include > #include > #include > +#include > > #include "firewall/conntrack.h" > #include "firewall/conntrack.c" > #include "test_suites.h" > > -static struct connection *setup_connection(void) { > - struct hip_data data = { }; > + > +static struct connection *setup_connection(void) > +{ > + struct hip_fw_context ctx = {}; // only ctx.udp_encap_hdr is examined > + struct hip_data data = {}; [L] Are these empty initializers needed? Could 'ctx' be declared as const? If some of this doesn't make any sense, blame it on my flu-infested brain ;-) Stefan