/*****
*
* Copyright (C) 2002 Yoann Vandoorselaere <yoann@prelude-ids.org>
* All Rights Reserved
*
* This file is part of the Prelude program.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by 
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING.  If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Written by Yoann Vandoorselaere <yoann@prelude-ids.org>
*
*****/


/*
 * This codes is inspired from the Snort spp_unidecode preprocessor.
 * It provide the same functionnality, but the code was entirely rewritten.
 */


#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <inttypes.h>
#include <assert.h>

#include "protocol.h"
#include "unicode-to-ascii.h"


typedef struct {
        unsigned char *uri;
        int length;
} http_uri_t;


/*
 * 
 */
static uint8_t is_whitespace[256];


/*
 * plugin stuff
 */
static int is_enabled = 0;
static port_list_t *port_list;
static int http_plugin_id = 0;
static plugin_protocol_t plugin;

static nids_alert_t alert;

/*
 * run-time options.
 */
static int do_detect;
static int max_whitespace = 10;
static int end_on_url_param = 0;
static int check_double_encode = 0;
static int iis_flip_backslash = 0;


#ifdef DEBUG

static void dump_str(unsigned char *str, uint16_t len) 
{
        int i;

        printf("length=%d fstring=\"", len);
        
        for ( i = 0; i < len; i++ ) {
                if ( isprint(str[i]) )
                        putchar(str[i]);
                else
                        putchar('?');
        }
        
        printf("\"\n");
}


static void dump_uri(http_uri_t *ptr) 
{
        dump_str(ptr->uri, ptr->length);
}

#endif




static int match_uricontent(packet_container_t *pc, void *data) 
{
        http_uri_t *uri;
        
        /*
         * no data available in the given packet.
         */
        if ( pc->application_layer_depth < 0 )
                return -1;
        
        if ( ! pc->protocol_plugin_data || pc->protocol_plugin_id != http_plugin_id )
                return -1;
        
        uri = pc->protocol_plugin_data;

        assert(uri->length < pc->packet[pc->application_layer_depth].len);
        
        return signature_match_content(uri->uri, uri->length, data);
}




static void log_unknown(packet_container_t *packet, uint8_t unicode)
{
        static idmef_impact_t impact;
        
        if ( ! do_detect )
                return;
        
        alert.impact = &impact;
        impact.type = other;
        impact.severity = impact_low;
        impact.completion = 0;
        impact.description.string = NULL;
        idmef_string_set_constant(&alert.classification.name,
                                  "Unknown Unicode Mapping");
        
        nids_alert((plugin_generic_t *) &plugin, packet, &alert,
                   "Unknown Unicode Mapping: %u", unicode);
        do_detect = 0;
}




static void log_invalid_utf8(packet_container_t *packet, const char *buf)
{
         static idmef_impact_t impact;

         if ( ! do_detect )
                 return;
         
         alert.impact = &impact;
         impact.type = other;
         impact.severity = impact_low;
         impact.completion = 0;
         impact.description.string = NULL;
         idmef_string_set_constant(&alert.classification.name,
                                   "Invalid Unicode String detected");
         
         nids_alert((plugin_generic_t *) &plugin, packet, &alert,
                    "Found invalid UTF-8 sequence : %%%s", buf);
         do_detect = 0;
}




#define log_other(packet, class, impact) do_log_other(packet, class, sizeof(class), impact, sizeof(impact))

static void do_log_other(packet_container_t *packet,
                      const char *class, size_t clen,
                      const char *impact, size_t ilen) 
{        
        alert.impact->type = other;
        alert.impact->severity = impact_high;
        alert.impact->completion = 0;
        
        alert.impact->description.len = ilen;
        alert.impact->description.string = impact;

        alert.classification.name.len = clen;
        alert.classification.name.string = class;
        
        nids_alert((plugin_generic_t *) &plugin, packet, &alert, NULL);
        do_detect = 0;
}





static struct utf8 {
        int smask;
        int sequence;
        int sequence_len;
} utf8_tbl[] = {
        { 0xe0, 0xc0, 2 },
        { 0xf0, 0xe0, 3 },
        { 0xf8, 0xf0, 4 },
        { 0xfc, 0xf8, 5 },
        { 0xff, 0xfc, 6 },
        { 0, 0, 0       },
};


        

static inline int utf8_data_remove_header(uint8_t *in, int *overlong) 
{
        /*
         * The first byte of a multibyte sequence that represents a non-ASCII character
         * is always in the range 0xC0 to 0xFD and it indicates how many bytes follow for
         * this character. All further bytes in a multibyte sequence are in the range 0x80 to 0xBF.
         */
        if ( *in < 0x80 || *in > 0xbf )
                return -1;

        if ( *in == 0x80 )
                *overlong = 1;
        
        /*
         * set first two bits to 0.
         */
        *in &= 0x3f;

        return 0;
}




static inline void utf8_sequence_remove_header(struct utf8 *sequence, uint32_t *in) 
{
        /*
         * set header bits from the UTF-8 sequence to 0. Keep data.
         */
        *in &= ~sequence->smask;
}



static inline int utf8_get_len(struct utf8 *sequence) 
{
        /*
         * Each byte of an UTF-8 sequence contain 6 bits of data.
         * Except the first bits (the sequence), which contain a
         * random amount of data.
         */
        return (sequence->sequence_len - 1) * 6;
}




static inline void utf8_to_unicode(wchar_t *unicode, int *offset, uint8_t data) 
{        
        *unicode |= data << *offset;
        
        /* 
         * each utf-8 byte contain 6 bits of data
         */
        *offset -= 6;
}





static struct utf8 *utf8_get_sequence(uint8_t byte)
{
        int i;
        
        for ( i = 0; utf8_tbl[i].smask != 0; i++ )
                if ( (byte & utf8_tbl[i].smask) == utf8_tbl[i].sequence ) 
                        return &utf8_tbl[i];
        return NULL;
}



static int iss_sequence_to_unicode(packet_container_t *packet,
                                   const unsigned char *in, size_t inlen, unsigned char *out)
{
        int ret;
        char c, buf[3];
        uint8_t unichar;
        
        if ( inlen < 4 ) {
                log(LOG_ERR, "overflow, inlen=%d, \"%s\"\n", inlen, in);
                return -1;
        }

        buf[0] = in[0];
        buf[1] = in[1];
        buf[2] = 0;
        unichar = strtoul(buf, NULL, 16);
                
        buf[0] = in[2];
        buf[1] = in[3];
        buf[2] = 0;
        unichar += strtoul(buf, NULL, 16);

        ret = unicode_to_ascii(&c, unichar);
        if ( ret < 0 || c == 0 ) {
                log_unknown(packet, unichar);
                return -1;
        }

        *out = c;
        
        /*
         * %uXXXX
         */
        return 6; 
}




static int http_sequence_to_unicode(packet_container_t *packet, const unsigned char *in,
                                    size_t seqlen, wchar_t *out, int offset) 
{
        char buf[3];
        uint8_t data;
        int i, ret, overlong = 0;
        
        for ( i = 0; i < seqlen; ) {
                
                if ( in[i] != '%' ) {
                        log_invalid_utf8(packet, NULL);
                        return -1;
                }

                i++;

                buf[0] = *(in + i);
                buf[1] = *(in + i + 1);
                buf[2] = 0;

                data = strtoul(buf, NULL, 16);
                
                ret = utf8_data_remove_header(&data, &overlong);
                if ( ret < 0 ) {
                        log_invalid_utf8(packet, buf);
                        return -1;
                }
                                
                utf8_to_unicode(out, &offset, data);
                i += 2;
        }

        if ( overlong )
                log_other(packet, "Overlong UTF-8 sequence received",
                          "If the HTTP server is not carefull, this might allow to hide "
                          "character like %2f (/) by using forbidden UTF-8 sequence like "
                          "%fc%80%80%80%80%af which could be converted to %2f without being detected");
                
        return 0;
}




static int read_http_encapsulated_utf8_sequence(int hex, packet_container_t *p,
                                                const unsigned char *in, size_t inlen, unsigned char *out) 
{
        unsigned char c;
        int offset, ret;
        struct utf8 *utf8;
        wchar_t unichar = 0;
        
        /*
         * handle UTF-8 here.
         */
        utf8 = utf8_get_sequence(hex);
        if ( ! utf8 ) 
                return -1;
        
        unichar = 0;
        offset = utf8_get_len(utf8);
        
        utf8_sequence_remove_header(utf8, &hex);;
        utf8_to_unicode(&unichar, &offset, hex);
                
        if ( inlen < (utf8->sequence_len * 3) ) {
                /*
                 * FIXME: issue an alert here
                 */
                return -1;
        }
        
        ret = http_sequence_to_unicode(p, in + 3, (utf8->sequence_len - 1) * 3, &unichar, offset);
        if ( ret < 0 )
                return -1;

        ret = unicode_to_ascii(&c, unichar);
        if ( ret < 0 || c == 0 ) {
                log_unknown(p, unichar);
                return -1;
        }
        
        *out = c;

        /*
         * Each byte in the sequence is 2 character + the '%' escape character.
         */
        return utf8->sequence_len * 3; 
}




/*
 * from http://www.cl.cam.ac.uk/~mgk25/unicode.html:
 *
 * The first byte of a multibyte sequence that represents a non-ASCII character
 * is always in the range 0xC0 to 0xFD and it indicates how many bytes follow for
 * this character.
 */
static inline int is_utf8_sequence(int hex) 
{
        if ( hex >= 0xc0 && hex <= 0xfd )
                return 0;

        return -1;
}




static int process_escaped_data(packet_container_t *p, const unsigned char *in, uint16_t dsize, unsigned char *out) 
{
        int hex;
        unsigned char buf[3];

        if ( dsize < 3 )
                return -1;

        if ( in[1] == 'u' || in[1] == 'U' ) 
                return iss_sequence_to_unicode(p, in + 2, dsize - 2, out);
        
        buf[0] = in[1];
        buf[1] = in[2];
        buf[2] = 0;
        
        /*
         * convert string to real hex.
         */
        hex = strtoul(buf, NULL, 16);
        if ( hex < 128 ) {
            
                /*
                 * this is a valid ASCII character.
                 */
                if ( check_double_encode && hex == '%' ) {
                        log_other(p, "HTTP escape sequence hide another sequence",
                                  "If the HTTP server is not carefull, this might allow to hide "
                                  "character like %2f (/) by using escape sequence like %25%32%66 which "
                                  "could be converted to %2f without being detected");
                } 
                
                *out = hex;
                
                return 3;
        }

        if ( is_utf8_sequence(hex) == 0 )
                return read_http_encapsulated_utf8_sequence(hex, p, in, dsize, out);
        
        return -1;
}






static int decode_http_packet(packet_container_t *packet, unsigned char *data, uint16_t dsize) 
{
        unsigned char new;
        int len = 0, ret, i = 0;
        static http_uri_t http_uri;
        int uri_method_found = 0, space_before_method = 0, space_after_method = 0;

        do_detect = 1;
        http_uri.uri = NULL;
        http_uri.length = 0;
        packet->protocol_plugin_data = NULL;
        
        while ( i < dsize && ! http_uri.length ) {
                
                ret = new = 0;
                if ( ! is_whitespace[data[i]] )
                        uri_method_found = 1;

                if ( iis_flip_backslash && data[i] == '\\' )
                        data[i] = '/';
                
                if ( data[i] == '%' )
                        ret = process_escaped_data(packet, &data[i], dsize - i, &new);
                
                else if ( is_whitespace[data[i]] ) {
                        if ( uri_method_found && ++space_after_method == max_whitespace ) 
                                log_other(packet, "High number of space found in after URI method",
                                          "This could be issued in order to evade the IDS.");

                        else if ( ! uri_method_found && ++space_before_method == max_whitespace )  
                                log_other(packet, "High number of space found before URI method",
                                          "This could be issued in order to evade the IDS.");
                }
                
                if ( ! http_uri.uri && space_after_method && ! is_whitespace[data[i]] ) {                                                
                        http_uri.uri = data + len;
                        packet->protocol_plugin_id = http_plugin_id;
                        packet->protocol_plugin_data = &http_uri;
                }
                
                else if ( http_uri.uri && (is_whitespace[data[i]] || (end_on_url_param && data[i] == '?')) ) {
                        /*
                         * length is address of data we are at - address where URI begin.
                         */                        
                        http_uri.length = (data + len) - http_uri.uri;
                        break;
                }
                
                data[len++] = new ? new : data[i];
                i += (ret > 0) ? ret : 1;
        }

#if 0
        /*
         * commented this out, cause we might be getting a portion
         * of the data sent to port 80 (which might be *after* the URL,
         * like client setting).
         *
         * This tend to generate a lot of false positive. The known fix
         * is to enable tcp stream reassembly, but still, it don't mean
         * you won't get the alert.
         */
        if ( ! http_uri.uri ) 
                log_other(packet, "HTTP request without URL", NULL);
#endif
        
#ifdef DEBUG
        dump_uri(&http_uri);
#endif
        
        while ( i < dsize ) 
                data[len++] = data[i++];
        
        return 0;
}





static int http_decode(packet_container_t *pc, unsigned char *data, int psize) 
{
        int ret;
        uint16_t dport;
        int depth = pc->transport_layer_depth;
        
        if ( depth < 0 || pc->packet[depth].proto != p_tcp )
                return -1;
        
        dport = extract_uint16(&pc->packet[depth].p.tcp->th_dport);
        
        ret = protocol_plugin_is_port_ok(port_list, dport);
        if ( ret < 0 )
                return -1;

        return decode_http_packet(pc, data, psize);        
}



static int parse_uricontent(char *str, rules_t **rules) 
{
        int ret;
        
        ret = signature_parse_content(str, rules);
        if ( ret < 0 )
                return -1;
        
        (*rules)->rule->leaf_match->leaf_match = &match_uricontent;

        return 0;
}



static void setup_default_port_list(void) 
{
        protocol_plugin_add_port_to_list(port_list, 80);
        protocol_plugin_add_port_to_list(port_list, 8080);
}



static int set_port_list(prelude_option_t *opt, const char *optarg) 
{
        protocol_plugin_add_string_port_to_list(port_list, optarg);
        return prelude_option_success;
}




static int set_double_encode(prelude_option_t *opt, const char *arg) 
{
        check_double_encode = 1;
        return prelude_option_success;
}



static int set_iis_flip_backslash(prelude_option_t *opt, const char *arg) 
{
        iis_flip_backslash = 1;
        return prelude_option_success;
}




static int set_max_whitespace(prelude_option_t *opt, const char *arg) 
{
        max_whitespace = atoi(arg);
        return prelude_option_success;
}



static int set_end_on_url_param(prelude_option_t *opt, const char *arg) 
{
        end_on_url_param = 1;
        return prelude_option_success;
}



static int set_http_state(prelude_option_t *opt, const char *optarg) 
{
        int ret;
        
        if ( is_enabled == 1 ) {
                ret = plugin_unsubscribe((plugin_generic_t *) &plugin);
                if ( ret < 0 )
                        return prelude_option_error;        
                is_enabled = 0;
        }

        else {
                ret = plugin_subscribe((plugin_generic_t *) &plugin);
                if ( ret < 0 )
                        return prelude_option_error;
                      
        	ret = unicode_load_table();
        	if (ret < 0 )
        		return prelude_option_error;
                
                is_enabled = 1;
        }
        
        return prelude_option_success;
}




static int get_http_state(char *buf, size_t size) 
{
        snprintf(buf, size, "%s", (is_enabled == 1) ? "enabled" : "disabled");
        return prelude_option_success;
}




plugin_generic_t *plugin_init(int argc, char **argv)
{
        int i;
        prelude_option_t *opt;
        static idmef_impact_t impact;
        
        opt = prelude_option_add(NULL, CLI_HOOK|WIDE_HOOK|CFG_HOOK, 0, "httpmod",
                                 "Set HttpMod plugin options", no_argument,
                                 set_http_state, get_http_state);
        
        prelude_option_add(opt, CLI_HOOK|WIDE_HOOK|CFG_HOOK, 'p', "port-list",
                           "List of port to look at", required_argument,
                           set_port_list, NULL);
                           
        prelude_option_add(opt, CLI_HOOK|WIDE_HOOK|CFG_HOOK, 'f', "codepage-file", 
                           "File containing Unicode rewrite tables", required_argument, 
                           unicode_set_table_file, NULL);
                           
        prelude_option_add(opt, CLI_HOOK|WIDE_HOOK|CFG_HOOK, 'n', "codepage-number",
        		   "Codepage number to use", required_argument,
        		   unicode_set_codepage, NULL);

        prelude_option_add(opt, CLI_HOOK|WIDE_HOOK|CFG_HOOK, 'd', "double-encode",
                           "Check for encoded '%' character", no_argument,
                           set_double_encode, NULL);

        prelude_option_add(opt, CLI_HOOK|WIDE_HOOK|CFG_HOOK, 'e', "end-on-param",
                           "Stop parsing the URL when we meet a parameter", no_argument,
                           set_end_on_url_param, NULL);

        prelude_option_add(opt, CLI_HOOK|WIDE_HOOK|CFG_HOOK, 'm', "max-whitespace",
                           "Maximum number of whitespace allowed before URL begin",
                           required_argument, set_max_whitespace, NULL);

        prelude_option_add(opt, CLI_HOOK|WIDE_HOOK|CFG_HOOK, 'f', "flip-backslash",
                           "Change '\\' to '/' when parsing URL", no_argument,
                           set_iis_flip_backslash, NULL);
        
        http_plugin_id = plugin_request_new_id();
        
        plugin_set_name(&plugin, "HttpMod");        
        plugin_set_protocol(&plugin, "http");
        plugin_set_author(&plugin, "Yoann Vandoorselaere");
        plugin_set_contact(&plugin, "yoann@prelude-ids.org");
        plugin_set_desc(&plugin, "Snort based http decode plugin.");
        plugin_set_running_func(&plugin, http_decode);

        port_list = protocol_plugin_port_list_new();
        setup_default_port_list();

        nids_alert_init(&alert);
        alert.impact = &impact;

        for ( i = 0; i < 256; i++ ) 
                is_whitespace[i] = 0;

        is_whitespace[0x20] = 1; /* ' '  */
        is_whitespace[0x0a] = 1; /* '\n' */
        is_whitespace[0x0d] = 1; /* '\r' */
        is_whitespace[0x09] = 1; /* '\t' */
        
        signature_parser_add_one_arg_key("uricontent", &parse_uricontent);
        
        return (plugin_generic_t *) &plugin;
}




