diff options
-rw-r--r-- | extras/filters/filter-rspamd/json.c | 1011 | ||||
-rw-r--r-- | extras/filters/filter-rspamd/json.h | 283 | ||||
-rw-r--r-- | extras/filters/filter-rspamd/rspamd.c | 435 | ||||
-rw-r--r-- | extras/filters/filter-rspamd/rspamd.h | 77 |
4 files changed, 1806 insertions, 0 deletions
diff --git a/extras/filters/filter-rspamd/json.c b/extras/filters/filter-rspamd/json.c new file mode 100644 index 0000000..ea975e8 --- /dev/null +++ b/extras/filters/filter-rspamd/json.c @@ -0,0 +1,1011 @@ +/* vim: set et ts=3 sw=3 sts=3 ft=c: + * + * Copyright (C) 2012, 2013, 2014 James McLaughlin et al. All rights reserved. + * https://github.com/udp/json-parser + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "json.h" + +#ifdef _MSC_VER + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif +#endif + +const struct _json_value json_value_none; + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <math.h> + +typedef unsigned int json_uchar; + +static unsigned char hex_value (json_char c) +{ + if (isdigit(c)) + return c - '0'; + + switch (c) { + case 'a': case 'A': return 0x0A; + case 'b': case 'B': return 0x0B; + case 'c': case 'C': return 0x0C; + case 'd': case 'D': return 0x0D; + case 'e': case 'E': return 0x0E; + case 'f': case 'F': return 0x0F; + default: return 0xFF; + } +} + +typedef struct +{ + unsigned long used_memory; + + unsigned int uint_max; + unsigned long ulong_max; + + json_settings settings; + int first_pass; + + const json_char * ptr; + unsigned int cur_line, cur_col; + +} json_state; + +static void * default_alloc (size_t size, int zero, void * user_data) +{ + return zero ? calloc (1, size) : malloc (size); +} + +static void default_free (void * ptr, void * user_data) +{ + free (ptr); +} + +static void * json_alloc (json_state * state, unsigned long size, int zero) +{ + if ((state->ulong_max - state->used_memory) < size) + return 0; + + if (state->settings.max_memory + && (state->used_memory += size) > state->settings.max_memory) + { + return 0; + } + + return state->settings.mem_alloc (size, zero, state->settings.user_data); +} + +static int new_value (json_state * state, + json_value ** top, json_value ** root, json_value ** alloc, + json_type type) +{ + json_value * value; + int values_size; + + if (!state->first_pass) + { + value = *top = *alloc; + *alloc = (*alloc)->_reserved.next_alloc; + + if (!*root) + *root = value; + + switch (value->type) + { + case json_array: + + if (value->u.array.length == 0) + break; + + if (! (value->u.array.values = (json_value **) json_alloc + (state, value->u.array.length * sizeof (json_value *), 0)) ) + { + return 0; + } + + value->u.array.length = 0; + break; + + case json_object: + + if (value->u.object.length == 0) + break; + + values_size = sizeof (*value->u.object.values) * value->u.object.length; + + if (! (value->u.object.values = (json_object_entry *) json_alloc + (state, values_size + ((unsigned long) value->u.object.values), 0)) ) + { + return 0; + } + + value->_reserved.object_mem = (*(char **) &value->u.object.values) + values_size; + + value->u.object.length = 0; + break; + + case json_string: + + if (! (value->u.string.ptr = (json_char *) json_alloc + (state, (value->u.string.length + 1) * sizeof (json_char), 0)) ) + { + return 0; + } + + value->u.string.length = 0; + break; + + default: + break; + }; + + return 1; + } + + if (! (value = (json_value *) json_alloc + (state, sizeof (json_value) + state->settings.value_extra, 1))) + { + return 0; + } + + if (!*root) + *root = value; + + value->type = type; + value->parent = *top; + + #ifdef JSON_TRACK_SOURCE + value->line = state->cur_line; + value->col = state->cur_col; + #endif + + if (*alloc) + (*alloc)->_reserved.next_alloc = value; + + *alloc = *top = value; + + return 1; +} + +#define whitespace \ + case '\n': ++ state.cur_line; state.cur_col = 0; \ + case ' ': case '\t': case '\r' + +#define string_add(b) \ + do { if (!state.first_pass) string [string_length] = b; ++ string_length; } while (0); + +#define line_and_col \ + state.cur_line, state.cur_col + +static const long + flag_next = 1 << 0, + flag_reproc = 1 << 1, + flag_need_comma = 1 << 2, + flag_seek_value = 1 << 3, + flag_escaped = 1 << 4, + flag_string = 1 << 5, + flag_need_colon = 1 << 6, + flag_done = 1 << 7, + flag_num_negative = 1 << 8, + flag_num_zero = 1 << 9, + flag_num_e = 1 << 10, + flag_num_e_got_sign = 1 << 11, + flag_num_e_negative = 1 << 12, + flag_line_comment = 1 << 13, + flag_block_comment = 1 << 14; + +json_value * json_parse_ex (json_settings * settings, + const json_char * json, + size_t length, + char * error_buf) +{ + json_char error [json_error_max]; + const json_char * end; + json_value * top, * root, * alloc = 0; + json_state state = { 0 }; + long flags; + long num_digits = 0, num_e = 0; + json_int_t num_fraction = 0; + + /* Skip UTF-8 BOM + */ + if (length >= 3 && ((unsigned char) json [0]) == 0xEF + && ((unsigned char) json [1]) == 0xBB + && ((unsigned char) json [2]) == 0xBF) + { + json += 3; + length -= 3; + } + + error[0] = '\0'; + end = (json + length); + + memcpy (&state.settings, settings, sizeof (json_settings)); + + if (!state.settings.mem_alloc) + state.settings.mem_alloc = default_alloc; + + if (!state.settings.mem_free) + state.settings.mem_free = default_free; + + memset (&state.uint_max, 0xFF, sizeof (state.uint_max)); + memset (&state.ulong_max, 0xFF, sizeof (state.ulong_max)); + + state.uint_max -= 8; /* limit of how much can be added before next check */ + state.ulong_max -= 8; + + for (state.first_pass = 1; state.first_pass >= 0; -- state.first_pass) + { + json_uchar uchar; + unsigned char uc_b1, uc_b2, uc_b3, uc_b4; + json_char * string = 0; + unsigned int string_length = 0; + + top = root = 0; + flags = flag_seek_value; + + state.cur_line = 1; + + for (state.ptr = json ;; ++ state.ptr) + { + json_char b = (state.ptr == end ? 0 : *state.ptr); + + if (flags & flag_string) + { + if (!b) + { sprintf (error, "Unexpected EOF in string (at %d:%d)", line_and_col); + goto e_failed; + } + + if (string_length > state.uint_max) + goto e_overflow; + + if (flags & flag_escaped) + { + flags &= ~ flag_escaped; + + switch (b) + { + case 'b': string_add ('\b'); break; + case 'f': string_add ('\f'); break; + case 'n': string_add ('\n'); break; + case 'r': string_add ('\r'); break; + case 't': string_add ('\t'); break; + case 'u': + + if (end - state.ptr < 4 || + (uc_b1 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b2 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b3 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b4 = hex_value (*++ state.ptr)) == 0xFF) + { + sprintf (error, "Invalid character value `%c` (at %d:%d)", b, line_and_col); + goto e_failed; + } + + uc_b1 = (uc_b1 << 4) | uc_b2; + uc_b2 = (uc_b3 << 4) | uc_b4; + uchar = (uc_b1 << 8) | uc_b2; + + if ((uchar & 0xF800) == 0xD800) { + json_uchar uchar2; + + if (end - state.ptr < 6 || (*++ state.ptr) != '\\' || (*++ state.ptr) != 'u' || + (uc_b1 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b2 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b3 = hex_value (*++ state.ptr)) == 0xFF || + (uc_b4 = hex_value (*++ state.ptr)) == 0xFF) + { + sprintf (error, "Invalid character value `%c` (at %d:%d)", b, line_and_col); + goto e_failed; + } + + uc_b1 = (uc_b1 << 4) | uc_b2; + uc_b2 = (uc_b3 << 4) | uc_b4; + uchar2 = (uc_b1 << 8) | uc_b2; + + uchar = 0x010000 | ((uchar & 0x3FF) << 10) | (uchar2 & 0x3FF); + } + + if (sizeof (json_char) >= sizeof (json_uchar) || (uchar <= 0x7F)) + { + string_add ((json_char) uchar); + break; + } + + if (uchar <= 0x7FF) + { + if (state.first_pass) + string_length += 2; + else + { string [string_length ++] = 0xC0 | (uchar >> 6); + string [string_length ++] = 0x80 | (uchar & 0x3F); + } + + break; + } + + if (uchar <= 0xFFFF) { + if (state.first_pass) + string_length += 3; + else + { string [string_length ++] = 0xE0 | (uchar >> 12); + string [string_length ++] = 0x80 | ((uchar >> 6) & 0x3F); + string [string_length ++] = 0x80 | (uchar & 0x3F); + } + + break; + } + + if (state.first_pass) + string_length += 4; + else + { string [string_length ++] = 0xF0 | (uchar >> 18); + string [string_length ++] = 0x80 | ((uchar >> 12) & 0x3F); + string [string_length ++] = 0x80 | ((uchar >> 6) & 0x3F); + string [string_length ++] = 0x80 | (uchar & 0x3F); + } + + break; + + default: + string_add (b); + }; + + continue; + } + + if (b == '\\') + { + flags |= flag_escaped; + continue; + } + + if (b == '"') + { + if (!state.first_pass) + string [string_length] = 0; + + flags &= ~ flag_string; + string = 0; + + switch (top->type) + { + case json_string: + + top->u.string.length = string_length; + flags |= flag_next; + + break; + + case json_object: + + if (state.first_pass) + (*(json_char **) &top->u.object.values) += string_length + 1; + else + { + top->u.object.values [top->u.object.length].name + = (json_char *) top->_reserved.object_mem; + + top->u.object.values [top->u.object.length].name_length + = string_length; + + (*(json_char **) &top->_reserved.object_mem) += string_length + 1; + } + + flags |= flag_seek_value | flag_need_colon; + continue; + + default: + break; + }; + } + else + { + string_add (b); + continue; + } + } + + if (state.settings.settings & json_enable_comments) + { + if (flags & (flag_line_comment | flag_block_comment)) + { + if (flags & flag_line_comment) + { + if (b == '\r' || b == '\n' || !b) + { + flags &= ~ flag_line_comment; + -- state.ptr; /* so null can be reproc'd */ + } + + continue; + } + + if (flags & flag_block_comment) + { + if (!b) + { sprintf (error, "%d:%d: Unexpected EOF in block comment", line_and_col); + goto e_failed; + } + + if (b == '*' && state.ptr < (end - 1) && state.ptr [1] == '/') + { + flags &= ~ flag_block_comment; + ++ state.ptr; /* skip closing sequence */ + } + + continue; + } + } + else if (b == '/') + { + if (! (flags & (flag_seek_value | flag_done)) && top->type != json_object) + { sprintf (error, "%d:%d: Comment not allowed here", line_and_col); + goto e_failed; + } + + if (++ state.ptr == end) + { sprintf (error, "%d:%d: EOF unexpected", line_and_col); + goto e_failed; + } + + switch (b = *state.ptr) + { + case '/': + flags |= flag_line_comment; + continue; + + case '*': + flags |= flag_block_comment; + continue; + + default: + sprintf (error, "%d:%d: Unexpected `%c` in comment opening sequence", line_and_col, b); + goto e_failed; + }; + } + } + + if (flags & flag_done) + { + if (!b) + break; + + switch (b) + { + whitespace: + continue; + + default: + + sprintf (error, "%d:%d: Trailing garbage: `%c`", + state.cur_line, state.cur_col, b); + + goto e_failed; + }; + } + + if (flags & flag_seek_value) + { + switch (b) + { + whitespace: + continue; + + case ']': + + if (top && top->type == json_array) + flags = (flags & ~ (flag_need_comma | flag_seek_value)) | flag_next; + else + { sprintf (error, "%d:%d: Unexpected ]", line_and_col); + goto e_failed; + } + + break; + + default: + + if (flags & flag_need_comma) + { + if (b == ',') + { flags &= ~ flag_need_comma; + continue; + } + else + { + sprintf (error, "%d:%d: Expected , before %c", + state.cur_line, state.cur_col, b); + + goto e_failed; + } + } + + if (flags & flag_need_colon) + { + if (b == ':') + { flags &= ~ flag_need_colon; + continue; + } + else + { + sprintf (error, "%d:%d: Expected : before %c", + state.cur_line, state.cur_col, b); + + goto e_failed; + } + } + + flags &= ~ flag_seek_value; + + switch (b) + { + case '{': + + if (!new_value (&state, &top, &root, &alloc, json_object)) + goto e_alloc_failure; + + continue; + + case '[': + + if (!new_value (&state, &top, &root, &alloc, json_array)) + goto e_alloc_failure; + + flags |= flag_seek_value; + continue; + + case '"': + + if (!new_value (&state, &top, &root, &alloc, json_string)) + goto e_alloc_failure; + + flags |= flag_string; + + string = top->u.string.ptr; + string_length = 0; + + continue; + + case 't': + + if ((end - state.ptr) < 3 || *(++ state.ptr) != 'r' || + *(++ state.ptr) != 'u' || *(++ state.ptr) != 'e') + { + goto e_unknown_value; + } + + if (!new_value (&state, &top, &root, &alloc, json_boolean)) + goto e_alloc_failure; + + top->u.boolean = 1; + + flags |= flag_next; + break; + + case 'f': + + if ((end - state.ptr) < 4 || *(++ state.ptr) != 'a' || + *(++ state.ptr) != 'l' || *(++ state.ptr) != 's' || + *(++ state.ptr) != 'e') + { + goto e_unknown_value; + } + + if (!new_value (&state, &top, &root, &alloc, json_boolean)) + goto e_alloc_failure; + + flags |= flag_next; + break; + + case 'n': + + if ((end - state.ptr) < 3 || *(++ state.ptr) != 'u' || + *(++ state.ptr) != 'l' || *(++ state.ptr) != 'l') + { + goto e_unknown_value; + } + + if (!new_value (&state, &top, &root, &alloc, json_null)) + goto e_alloc_failure; + + flags |= flag_next; + break; + + default: + + if (isdigit (b) || b == '-') + { + if (!new_value (&state, &top, &root, &alloc, json_integer)) + goto e_alloc_failure; + + if (!state.first_pass) + { + while (isdigit (b) || b == '+' || b == '-' + || b == 'e' || b == 'E' || b == '.') + { + if ( (++ state.ptr) == end) + { + b = 0; + break; + } + + b = *state.ptr; + } + + flags |= flag_next | flag_reproc; + break; + } + + flags &= ~ (flag_num_negative | flag_num_e | + flag_num_e_got_sign | flag_num_e_negative | + flag_num_zero); + + num_digits = 0; + num_fraction = 0; + num_e = 0; + + if (b != '-') + { + flags |= flag_reproc; + break; + } + + flags |= flag_num_negative; + continue; + } + else + { sprintf (error, "%d:%d: Unexpected %c when seeking value", line_and_col, b); + goto e_failed; + } + }; + }; + } + else + { + switch (top->type) + { + case json_object: + + switch (b) + { + whitespace: + continue; + + case '"': + + if (flags & flag_need_comma) + { sprintf (error, "%d:%d: Expected , before \"", line_and_col); + goto e_failed; + } + + flags |= flag_string; + + string = (json_char *) top->_reserved.object_mem; + string_length = 0; + + break; + + case '}': + + flags = (flags & ~ flag_need_comma) | flag_next; + break; + + case ',': + + if (flags & flag_need_comma) + { + flags &= ~ flag_need_comma; + break; + } + + default: + sprintf (error, "%d:%d: Unexpected `%c` in object", line_and_col, b); + goto e_failed; + }; + + break; + + case json_integer: + case json_double: + + if (isdigit (b)) + { + ++ num_digits; + + if (top->type == json_integer || flags & flag_num_e) + { + if (! (flags & flag_num_e)) + { + if (flags & flag_num_zero) + { sprintf (error, "%d:%d: Unexpected `0` before `%c`", line_and_col, b); + goto e_failed; + } + + if (num_digits == 1 && b == '0') + flags |= flag_num_zero; + } + else + { + flags |= flag_num_e_got_sign; + num_e = (num_e * 10) + (b - '0'); + continue; + } + + top->u.integer = (top->u.integer * 10) + (b - '0'); + continue; + } + + num_fraction = (num_fraction * 10) + (b - '0'); + continue; + } + + if (b == '+' || b == '-') + { + if ( (flags & flag_num_e) && !(flags & flag_num_e_got_sign)) + { + flags |= flag_num_e_got_sign; + + if (b == '-') + flags |= flag_num_e_negative; + + continue; + } + } + else if (b == '.' && top->type == json_integer) + { + if (!num_digits) + { sprintf (error, "%d:%d: Expected digit before `.`", line_and_col); + goto e_failed; + } + + top->type = json_double; + top->u.dbl = (double) top->u.integer; + + num_digits = 0; + continue; + } + + if (! (flags & flag_num_e)) + { + if (top->type == json_double) + { + if (!num_digits) + { sprintf (error, "%d:%d: Expected digit after `.`", line_and_col); + goto e_failed; + } + + top->u.dbl += ((double) num_fraction) / (pow (10.0, (double) num_digits)); + } + + if (b == 'e' || b == 'E') + { + flags |= flag_num_e; + + if (top->type == json_integer) + { + top->type = json_double; + top->u.dbl = (double) top->u.integer; + } + + num_digits = 0; + flags &= ~ flag_num_zero; + + continue; + } + } + else + { + if (!num_digits) + { sprintf (error, "%d:%d: Expected digit after `e`", line_and_col); + goto e_failed; + } + + top->u.dbl *= pow (10.0, (double) + (flags & flag_num_e_negative ? - num_e : num_e)); + } + + if (flags & flag_num_negative) + { + if (top->type == json_integer) + top->u.integer = - top->u.integer; + else + top->u.dbl = - top->u.dbl; + } + + flags |= flag_next | flag_reproc; + break; + + default: + break; + }; + } + + if (flags & flag_reproc) + { + flags &= ~ flag_reproc; + -- state.ptr; + } + + if (flags & flag_next) + { + flags = (flags & ~ flag_next) | flag_need_comma; + + if (!top->parent) + { + /* root value done */ + + flags |= flag_done; + continue; + } + + if (top->parent->type == json_array) + flags |= flag_seek_value; + + if (!state.first_pass) + { + json_value * parent = top->parent; + + switch (parent->type) + { + case json_object: + + parent->u.object.values + [parent->u.object.length].value = top; + + break; + + case json_array: + + parent->u.array.values + [parent->u.array.length] = top; + + break; + + default: + break; + }; + } + + if ( (++ top->parent->u.array.length) > state.uint_max) + goto e_overflow; + + top = top->parent; + + continue; + } + } + + alloc = root; + } + + return root; + +e_unknown_value: + + sprintf (error, "%d:%d: Unknown value", line_and_col); + goto e_failed; + +e_alloc_failure: + + strlcpy (error, "Memory allocation failure", sizeof error); + goto e_failed; + +e_overflow: + + sprintf (error, "%d:%d: Too long (caught overflow)", line_and_col); + goto e_failed; + +e_failed: + + if (error_buf) + { + if (*error) + strcpy (error_buf, error); + else + strcpy (error_buf, "Unknown error"); + } + + if (state.first_pass) + alloc = root; + + while (alloc) + { + top = alloc->_reserved.next_alloc; + state.settings.mem_free (alloc, state.settings.user_data); + alloc = top; + } + + if (!state.first_pass) + json_value_free_ex (&state.settings, root); + + return 0; +} + +json_value * json_parse (const json_char * json, size_t length) +{ + json_settings settings = { 0 }; + return json_parse_ex (&settings, json, length, 0); +} + +void json_value_free_ex (json_settings * settings, json_value * value) +{ + json_value * cur_value; + + if (!value) + return; + + value->parent = 0; + + while (value) + { + switch (value->type) + { + case json_array: + + if (!value->u.array.length) + { + settings->mem_free (value->u.array.values, settings->user_data); + break; + } + + value = value->u.array.values [-- value->u.array.length]; + continue; + + case json_object: + + if (!value->u.object.length) + { + settings->mem_free (value->u.object.values, settings->user_data); + break; + } + + value = value->u.object.values [-- value->u.object.length].value; + continue; + + case json_string: + + settings->mem_free (value->u.string.ptr, settings->user_data); + break; + + default: + break; + }; + + cur_value = value; + value = value->parent; + settings->mem_free (cur_value, settings->user_data); + } +} + +void json_value_free (json_value * value) +{ + json_settings settings = { 0 }; + settings.mem_free = default_free; + json_value_free_ex (&settings, value); +} + diff --git a/extras/filters/filter-rspamd/json.h b/extras/filters/filter-rspamd/json.h new file mode 100644 index 0000000..f6549ec --- /dev/null +++ b/extras/filters/filter-rspamd/json.h @@ -0,0 +1,283 @@ + +/* vim: set et ts=3 sw=3 sts=3 ft=c: + * + * Copyright (C) 2012, 2013, 2014 James McLaughlin et al. All rights reserved. + * https://github.com/udp/json-parser + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _JSON_H +#define _JSON_H + +#ifndef json_char + #define json_char char +#endif + +#ifndef json_int_t + #ifndef _MSC_VER + #include <inttypes.h> + #define json_int_t int64_t + #else + #define json_int_t __int64 + #endif +#endif + +#include <stdlib.h> + +#ifdef __cplusplus + + #include <string.h> + + extern "C" + { + +#endif + +typedef struct +{ + unsigned long max_memory; + int settings; + + /* Custom allocator support (leave null to use malloc/free) + */ + + void * (* mem_alloc) (size_t, int zero, void * user_data); + void (* mem_free) (void *, void * user_data); + + void * user_data; /* will be passed to mem_alloc and mem_free */ + + size_t value_extra; /* how much extra space to allocate for values? */ + +} json_settings; + +#define json_enable_comments 0x01 + +typedef enum +{ + json_none, + json_object, + json_array, + json_integer, + json_double, + json_string, + json_boolean, + json_null + +} json_type; + +extern const struct _json_value json_value_none; + +typedef struct _json_object_entry +{ + json_char * name; + unsigned int name_length; + + struct _json_value * value; + +} json_object_entry; + +typedef struct _json_value +{ + struct _json_value * parent; + + json_type type; + + union + { + int boolean; + json_int_t integer; + double dbl; + + struct + { + unsigned int length; + json_char * ptr; /* null terminated */ + + } string; + + struct + { + unsigned int length; + + json_object_entry * values; + + #if defined(__cplusplus) && __cplusplus >= 201103L + decltype(values) begin () const + { return values; + } + decltype(values) end () const + { return values + length; + } + #endif + + } object; + + struct + { + unsigned int length; + struct _json_value ** values; + + #if defined(__cplusplus) && __cplusplus >= 201103L + decltype(values) begin () const + { return values; + } + decltype(values) end () const + { return values + length; + } + #endif + + } array; + + } u; + + union + { + struct _json_value * next_alloc; + void * object_mem; + + } _reserved; + + #ifdef JSON_TRACK_SOURCE + + /* Location of the value in the source JSON + */ + unsigned int line, col; + + #endif + + + /* Some C++ operator sugar */ + + #ifdef __cplusplus + + public: + + inline _json_value () + { memset (this, 0, sizeof (_json_value)); + } + + inline const struct _json_value &operator [] (int index) const + { + if (type != json_array || index < 0 + || ((unsigned int) index) >= u.array.length) + { + return json_value_none; + } + + return *u.array.values [index]; + } + + inline const struct _json_value &operator [] (const char * index) const + { + if (type != json_object) + return json_value_none; + + for (unsigned int i = 0; i < u.object.length; ++ i) + if (!strcmp (u.object.values [i].name, index)) + return *u.object.values [i].value; + + return json_value_none; + } + + inline operator const char * () const + { + switch (type) + { + case json_string: + return u.string.ptr; + + default: + return ""; + }; + } + + inline operator json_int_t () const + { + switch (type) + { + case json_integer: + return u.integer; + + case json_double: + return (json_int_t) u.dbl; + + default: + return 0; + }; + } + + inline operator bool () const + { + if (type != json_boolean) + return false; + + return u.boolean != 0; + } + + inline operator double () const + { + switch (type) + { + case json_integer: + return (double) u.integer; + + case json_double: + return u.dbl; + + default: + return 0; + }; + } + + #endif + +} json_value; + +json_value * json_parse (const json_char * json, + size_t length); + +#define json_error_max 128 +json_value * json_parse_ex (json_settings * settings, + const json_char * json, + size_t length, + char * error); + +void json_value_free (json_value *); + + +/* Not usually necessary, unless you used a custom mem_alloc and now want to + * use a custom mem_free. + */ +void json_value_free_ex (json_settings * settings, + json_value *); + + +#ifdef __cplusplus + } /* extern "C" */ +#endif + +#endif + + diff --git a/extras/filters/filter-rspamd/rspamd.c b/extras/filters/filter-rspamd/rspamd.c new file mode 100644 index 0000000..8f5c039 --- /dev/null +++ b/extras/filters/filter-rspamd/rspamd.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2016 Gilles Chehade <gilles@poolp.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "includes.h" + +#include <sys/types.h> + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include <unistd.h> + +#include "smtpd-defines.h" +#include "smtpd-api.h" +#include "log.h" +#include "iobuf.h" +#include "ioev.h" + +#include "rfc2822.h" +#include "rspamd.h" +#include "json.h" + +struct sockaddr_storage ss; + +/* XXX + * this needs to be handled differently, but lets focus on the filter for now + */ +void +rspamd_resolve(const char *h, const char *p) +{ + struct addrinfo hints, *addresses, *ai; + int fd, r; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + if ((r = getaddrinfo(h, p, &hints, &addresses))) + fatalx("resolve: getaddrinfo %s", gai_strerror(r)); + for (ai = addresses; ai; ai = ai->ai_next) { + if ((fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) == -1) + continue; + if (connect(fd, ai->ai_addr, ai->ai_addrlen) == -1) { + close(fd); + continue; + } + close(fd); + memmove(&ss, ai->ai_addr, ai->ai_addrlen); + break; + } + freeaddrinfo(addresses); + if (!ai) + fatalx("resolve: failed"); +} + +static void +headers_callback(const struct rfc2822_header *hdr, void *arg) +{ + struct session *rs = arg; + struct rfc2822_line *l; + char buffer[4096]; + int i = 0; + + log_debug("#######1"); + TAILQ_FOREACH(l, &hdr->lines, next) { + if (i++ == 0) { + snprintf(buffer, sizeof buffer, "%s:%s", hdr->name, l->buffer); + filter_api_writeln(rs->id, buffer); + continue; + } + filter_api_writeln(rs->id, l->buffer); + } +} + +static void +dataline_callback(const char *line, void *arg) +{ + struct session *rs = arg; + + log_debug("debug: STREAM BACK: [%s]", rs->tx.line); + filter_api_writeln(rs->id, rs->tx.line); +} + + +struct session * +session_init(uint64_t id) +{ + struct session *rs; + + rs = xcalloc(1, sizeof *rs, "on_connect"); + rs->id = id; + return rs; +} + +void +session_reset(struct session *rs) +{ + iobuf_clear(&rs->iobuf); + io_clear(&rs->io); + + filter_api_datahold_close(rs->id); + + free(rs->tx.from); + free(rs->tx.rcpt); + + rs->tx.eom = 0; + rs->tx.error = 0; + rs->tx.from = NULL; + rs->tx.rcpt = NULL; +} + +void +session_free(struct session *rs) +{ + session_reset(rs); + + free(rs->ip); + free(rs->hostname); + free(rs->helo); + free(rs); +} + +int +rspamd_connect(struct session *rs) +{ + iobuf_xinit(&rs->iobuf, LINE_MAX, LINE_MAX, "on_eom"); + io_init(&rs->io, -1, rs, rspamd_io, &rs->iobuf); + if (io_connect(&rs->io, (struct sockaddr *)&ss, NULL) == -1) + return 0; + return 1; +} + +void +rspamd_disconnect(struct session *rs) +{ + iobuf_clear(&rs->iobuf); + io_clear(&rs->io); +} + +void +rspamd_connected(struct session *rs) +{ + filter_api_accept(rs->id); +} + +void +rspamd_error(struct session *rs) +{ + filter_api_reject_code(rs->id, FILTER_FAIL, 421, "temporary failure"); + session_reset(rs); +} + +void +rspamd_send_query(struct session *rs) +{ + iobuf_xfqueue(&rs->iobuf, "io", + "POST /check HTTP/1.0\r\n" + "Transfer-Encoding: chunked\r\n" + "Pass: all\r\n" + "IP: %s\r\n" + "Helo: %s\r\n" + "Hostname: %s\r\n" + "From: %s\r\n" + "Rcpt: %s\r\n" + "\r\n", + rs->ip, + rs->helo, + rs->hostname, + rs->tx.from, + rs->tx.rcpt); + io_reload(&rs->io); +} + +void +rspamd_send_chunk(struct session *rs, const char *line) +{ + if (line) + iobuf_xfqueue(&rs->iobuf, "io", "%x\r\n%s\r\n\r\n", + strlen(line)+2, line); + else { + iobuf_xfqueue(&rs->iobuf, "io", "0\r\n\r\n"); + rs->tx.eom = 1; + } + + io_reload(&rs->io); +} + +void +rspamd_read_response(struct session *rs) +{ + char *line; + + while ((line = iobuf_getline(&rs->iobuf, NULL))) + if (strlen(line) == 0) + rs->rspamd.eoh = 1; + + if (rs->rspamd.eoh) { + if (iobuf_len(&rs->iobuf) != 0) { + rs->rspamd.body = xmemdup(iobuf_data(&rs->iobuf), + iobuf_len(&rs->iobuf) + 1, "rspamd_read_response"); + rs->rspamd.body[iobuf_len(&rs->iobuf)] = 0; + } + } + iobuf_normalize(&rs->iobuf); +} + +int +rspamd_parse_response(struct session *rs) +{ + json_value *jv; + json_value *def = NULL; + char *name; + json_value *val; + size_t i; + + jv = json_parse(rs->rspamd.body, strlen(rs->rspamd.body)); + if (jv == NULL || jv->type != json_object) + goto fail; + + for (i = 0; i < jv->u.object.length; ++i) + if (strcmp(jv->u.object.values[i].name, "default") == 0) { + def = jv->u.object.values[i].value; + break; + } + if (def == NULL) + goto fail; + + for (i = 0; i < def->u.object.length; ++i) { + name = def->u.object.values[i].name; + + if (strcmp(name, "is_spam") == 0) { + val = def->u.object.values[i].value; + if (val->type != json_boolean) + goto fail; + rs->rspamd.is_spam = val->u.boolean; + } + else if (strcmp(name, "is_skipped") == 0) { + val = def->u.object.values[i].value; + if (val->type != json_boolean) + goto fail; + rs->rspamd.is_skipped = val->u.boolean; + } + else if (strcmp(name, "score") == 0) { + val = def->u.object.values[i].value; + if (val->type != json_double) + goto fail; + rs->rspamd.score = val->u.dbl; + } + else if (strcmp(name, "required_score") == 0) { + val = def->u.object.values[i].value; + if (val->type != json_double) + goto fail; + rs->rspamd.required_score = val->u.dbl; + } + else if (strcmp(name, "action") == 0) { + val = def->u.object.values[i].value; + if (val->type != json_string) + goto fail; + log_debug("[%.*s]", val->u.string.length, val->u.string.ptr); + if (strncmp(val->u.string.ptr, "no action", + val->u.string.length) == 0) + rs->rspamd.action = NO_ACTION; + else if (strncmp(val->u.string.ptr, "greylist", + val->u.string.length) == 0) + rs->rspamd.action = GREYLIST; + else if (strncmp(val->u.string.ptr, "add header", + val->u.string.length) == 0) + rs->rspamd.action = ADD_HEADER; + else if (strncmp(val->u.string.ptr, "rewrite subject", + val->u.string.length) == 0) + rs->rspamd.action = REWRITE_SUBJECT; + else if (strncmp(val->u.string.ptr, "soft reject", + val->u.string.length) == 0) + rs->rspamd.action = SOFT_REJECT; + else if (strncmp(val->u.string.ptr, "reject", + val->u.string.length) == 0) + rs->rspamd.action = REJECT; + } + else if (strcmp(name, "subject") == 0) { + val = def->u.object.values[i].value; + if (val->type != json_string) + goto fail; + rs->rspamd.subject = xmemdup(val->u.string.ptr, + val->u.string.length, "rspamd_parse_result"); + } + } + + json_value_free(jv); + return 1; + +fail: + json_value_free(jv); + return -1; +} + +void +rspamd_spam_header(const char *header, void *arg) +{ + struct session *rs = arg; + char buffer[4096]; + + snprintf(buffer, sizeof buffer, "X-Spam-Flag: %s", + rs->rspamd.is_spam ? "Yes" : "No"); + filter_api_writeln(rs->id, buffer); + + snprintf(buffer, sizeof buffer, "X-Spam-Score: %.2f", + rs->rspamd.score); + filter_api_writeln(rs->id, buffer); + + snprintf(buffer, sizeof buffer, "X-Spam-Status: %s, score=%.2f, required=%.2f", + rs->rspamd.is_spam ? "Yes" : "No", + rs->rspamd.score, + rs->rspamd.required_score); + filter_api_writeln(rs->id, buffer); +} + +int +rspamd_proceed(struct session *rs) +{ + rfc2822_parser_init(&rs->tx.rfc2822_parser); + rfc2822_parser_reset(&rs->tx.rfc2822_parser); + rfc2822_header_default_callback(&rs->tx.rfc2822_parser, + headers_callback, rs); + rfc2822_body_callback(&rs->tx.rfc2822_parser, + dataline_callback, rs); + + switch (rs->rspamd.action) { + case NO_ACTION: + return 1; + + case SOFT_REJECT: + filter_api_reject_code(rs->id, FILTER_FAIL, 421, + "message content rejected"); + return 0; + + case GREYLIST: + filter_api_reject_code(rs->id, FILTER_FAIL, 421, + "greylisted"); + return 0; + + case REJECT: + filter_api_reject_code(rs->id, FILTER_FAIL, 550, + "message content rejected"); + return 0; + + case ADD_HEADER: + /* insert header */ + log_debug("ADDING X-SPAM"); + rfc2822_missing_header_callback(&rs->tx.rfc2822_parser, + "x-spam", rspamd_spam_header, rs); + return 1; + + case REWRITE_SUBJECT: + /* rewrite subject */ + return 1; + } + + + +} + + +void +rspamd_io(struct io *io, int evt) +{ + struct session *rs = io->arg; + + switch (evt) { + case IO_CONNECTED: + rspamd_connected(rs); + rspamd_send_query(rs); + io_set_write(io); + break; + + case IO_LOWAT: + /* we've hit EOM and no more data, toggle to read */ + if (rs->tx.eom) + io_set_read(io); + break; + + case IO_DATAIN: + /* accumulate reply */ + rspamd_read_response(rs); + break; + + case IO_DISCONNECTED: + rspamd_disconnect(rs); + + /* we're done with rspamd, if there was a local error + * during transaction, reject now, else move forward. + */ + if (rs->tx.error) { + rspamd_error(rs); + break; + } + /* process rspamd reply and start processing datahold */ + if (! rspamd_parse_response(rs)) { + rspamd_error(rs); + break; + } + + if (! rspamd_proceed(rs)) + break; + + filter_api_datahold_start(rs->id); + break; + + case IO_TIMEOUT: + case IO_ERROR: + default: + //rspamd_error(rs); + break; + } + return; +} + diff --git a/extras/filters/filter-rspamd/rspamd.h b/extras/filters/filter-rspamd/rspamd.h new file mode 100644 index 0000000..d7446d2 --- /dev/null +++ b/extras/filters/filter-rspamd/rspamd.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 Gilles Chehade <gilles@poolp.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "includes.h" + +#define RSPAMD_HOST "127.0.0.1" +#define RSPAMD_PORT "11333" + +struct session { + uint64_t id; + + struct iobuf iobuf; + struct io io; + + char *ip; + char *hostname; + char *helo; + + struct tx { + FILE *fp; + char *line; + int error; + int eom; + + char *from; + char *rcpt; + struct rfc2822_parser rfc2822_parser; + } tx; + + struct rspamd_response { + int eoh; + char *body; + + int is_spam; + int is_skipped; + double score; + double required_score; + enum { + NO_ACTION, + GREYLIST, + ADD_HEADER, + REWRITE_SUBJECT, + SOFT_REJECT, + REJECT + } action; + char *subject; + } rspamd; + +}; + +struct session *session_init(uint64_t); +void session_reset(struct session *); +void session_free(struct session *); + +int rspamd_connect(struct session *); +void rspamd_connected(struct session *); +void rspamd_error(struct session *); +void rspamd_io(struct io *, int); +void rspamd_send_query(struct session *); +void rspamd_send_chunk(struct session *, const char *); +void rspamd_read_response(struct session *); +int rspamd_parse_response(struct session *); + + |