/*
 *
 * j-chkmail - filtre de messagerie pour sendmail - MILTER
 *
 * Copyright (c) 2001, 2002 Ecole des Mines de Paris
 *
 *  Auteur     : Jose Marcio Martins da Cruz
 *               martins@paris.ensmp.fr
 *
 *  Historique :
 *  Creation     : janvier 2002
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

#include <j-sys.h>

#include "j-chkmail.h"

#define  SZ_P               65536
#define  SZ_FREE            8192
#define  SZ_WORK            (SZ_P + SZ_FREE + 1)

#ifndef  TRUE
#define  TRUE  1
#define  FALSE 0
#endif


/* ****************************************************************************
 *                                                                            * 
 *                                                                            *
 **************************************************************************** */

#define   MAX(a,b)          ((a) > (b) ? (a) : (b))
#define   MAX2(a,b)         MAX((a),(b))
#define   MAX3(a,b,c)       MAX(a, MAX((b),(c)))
#define   MAX4(a,b,c,d)     MAX(MAX((a),(b)), MAX((c),(d)))

#define   MIN(a,b)          ((a) < (b) ? (a) : (b))
#define   MIN2(a,b)         MIN((a),(b))
#define   MIN3(a,b,c)       MIN(a, MIN((b),(c)))
#define   MIN4(a,b,c,d)     MIN(MIN((a),(b)), MIN((c),(d)))

/* ****************************************************************************
 *                                                                            * 
 *                                                                            *
 **************************************************************************** */
long
min4 (a, b, c, d)
     long            a;
     long            b;
     long            c;
     long            d;
{
  long            r = a;

  if (b < r)
    r = b;
  if (c < r)
    r = c;
  if (d < r)
    r = d;
  return r;
}

/* ****************************************************************************
 *                                                                            * 
 *                                                                            *
 **************************************************************************** */
long
min3 (a, b, c)
     long            a;
     long            b;
     long            c;
{
  long            r = a;

  if (b < r)
    r = b;
  if (c < r)
    r = c;
  return r;
}


/* ****************************************************************************
 *                                                                            * 
 *                                                                            *
 **************************************************************************** */
void
clean_tag_value (fname)
     char           *fname;
{
  char           *p, *q;

  if (fname == NULL)
    return;

  p = q = fname;

  while (*p) {
    switch (*p) {
      case '"':
      case '\r':
      case '\n':
      case '\t':
        break;
      default:
        *q++ = *p;
    }
    p++;
  }
  *q = '\0';
}

/* ****************************************************************************
 *                                                                            * 
 *                                                                            *
 **************************************************************************** */

#define   MAX_LINE  4096

#define   REGCOMP_FLAGS  (REG_ICASE | REG_NEWLINE | REG_EXTENDED)

#define   RE_CT "Content-type[ \t]*:"
#define   RE_CD "Content-disposition[ \t]*:"
#define   RE_UU "begin(-base64){0,1}[ \t]{1,}[0]{0,1}[0-7]{3,3}[ \t]{1,}[^ \t\r\n]{1,}"

#define   RE_HTML_I  "<html>"
#define   RE_HTML_F    "</html>"

#define   RE_SCRIPT_I "<script>"
#define   RE_SCRIPT_F   "</script>"

#define   RE_IFRAME_I "<iframe>"
#define   RE_IFRAME_F   "</iframe>"

typedef struct REGEX_T {
  bool            ok;
  pthread_mutex_t mutex;

  regex_t         re_ct;
  regex_t         re_cd;
  regex_t         re_uu;
  regex_t         re_html_i;
  regex_t         re_html_f;
  regex_t         re_script_i;
  regex_t         re_script_f;
  regex_t         re_iframe_i;
  regex_t         re_iframe_f;
} REGEX_T;

static REGEX_T  RE = { FALSE, PTHREAD_MUTEX_INITIALIZER };

/* ****************************************************************************
 *                                                                            * 
 *                                                                            *
 **************************************************************************** */
static          bool
init_regex ()
{
  pthread_mutex_lock (&RE.mutex);

  if (!RE.ok) {
    int             r = 0;
    bool            ok = TRUE;

    if ((r = regcomp (&RE.re_ct, RE_CT, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_ct, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if ((r = regcomp (&RE.re_cd, RE_CD, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_cd, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if ((r = regcomp (&RE.re_uu, RE_UU, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_uu, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if ((r = regcomp (&RE.re_html_i, RE_HTML_I, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_html_i, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if ((r = regcomp (&RE.re_html_f, RE_HTML_F, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_html_f, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if ((r = regcomp (&RE.re_script_i, RE_SCRIPT_I, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_script_i, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if ((r = regcomp (&RE.re_html_f, RE_HTML_F, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_html_f, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if ((r = regcomp (&RE.re_iframe_i, RE_IFRAME_I, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_iframe_i, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if ((r = regcomp (&RE.re_iframe_f, RE_IFRAME_F, REGCOMP_FLAGS)) != 0) {
      char            sout[256];

      regerror (r, &RE.re_iframe_f, sout, sizeof (sout));
      syslog (LOG_ERR, "%s regcomp error : %s", J_FUNCTION, sout);
      ok = FALSE;
    }

    if (!ok) {
      regfree (&RE.re_ct);
      regfree (&RE.re_cd);
      regfree (&RE.re_uu);
      regfree (&RE.re_html_i);
      regfree (&RE.re_html_f);
      regfree (&RE.re_script_i);
      regfree (&RE.re_script_f);
      regfree (&RE.re_iframe_i);
      regfree (&RE.re_iframe_f);
    }

    RE.ok = ok;
  }
  pthread_mutex_unlock (&RE.mutex);

  return RE.ok;
}


/* ****************************************************************************
 *                                                                            * 
 *                                                                            *
 **************************************************************************** */
#define MALLOC_WORK 1


int
scan_block (conn_id, chunk, sz_chunk, new, sz_new, state, content, list)
     unsigned long   conn_id;
     char           *chunk;
     long            sz_chunk;
     char           *new;
     long            sz_new;
     int            *state;
     content_field  *content;
     content_field **list;
{
  int             result = 0;

#if MALLOC_WORK
  char           *work = NULL;
#else
  char            work[SZ_WORK];
#endif
  char           *p;
  int             szc, nok;
  char            old[SZ_CHUNK];
  int             i;

  if (!RE.ok) {
    if (log_level >= 10)
      syslog (LOG_INFO, "%s : Initialising REGEX structure", J_FUNCTION);
    if (!init_regex ()) {
      syslog (LOG_ERR, "%s, Unable to initialise REGEX structure", J_FUNCTION);
      return 9;
    }
  }

  nok = 0;

  p = new;
  for (i = sz_new; i > 0; i--, p++) {
    if (*p == '\0')
      *p = ' ';
  }

  memset (old, 0, sizeof (old));
  strncpy (old, chunk, sizeof (old) - 1);

#if MALLOC_WORK
  if ((work = malloc (SZ_WORK)) == NULL) {
    syslog (LOG_ERR, "%08lX : %s : malloc work error : %s",
            conn_id, J_FUNCTION, strerror (errno));
    return 15;
  }
#endif

  memset (work, 0, SZ_WORK);
  while (nok < sz_new) {
    char           *last_rc;

    if (strlen (old) > SZ_WORK) {
      syslog (LOG_WARNING, "%08lX scan_block : strlen(old) = %d", conn_id,
              strlen (old));
      *old = '\0';
      result = 3;
      break;
    }
    strcpy (work, old);
    p = work + strlen (work);
    memset (old, 0, sizeof (old));
    *old = '\0';

    /* a revoir ... */
    szc = (sz_new - nok) >= SZ_P ? SZ_P : sz_new - nok;

    if (strlen (work) + szc > SZ_WORK) {
      /* Feb 24 00:30:08 paris j-chkmail[19028]: [ID 447404 local5.warning] scan_block : 
         strlen(work) + szc = 130975
         Feb 24 00:30:08 paris sendmail[17502]: [ID 801593 mail.error] g1NNU6WZ017502:
         milter_read(j-chkmail): cmd read returned 0, expecting 5
       */
      syslog (LOG_WARNING, "%08lX scan_block : strlen(work) + szc = %d, %d",
              conn_id, strlen (work) + szc, szc);
      last_rc = NULL;
      result = 4;
      break;
    }
    memcpy (p, new + nok, szc);
    nok += szc;
    p[szc] = '\0';

    if (strcspn (p, "\r\n") > SZ_FREE) {
      syslog (LOG_WARNING,
              "%08lX scan_block : trying a buffer overflow ??? "
              "linelenght : %d; strlen : %d", conn_id, strcspn (p, "\r\n"), strlen (p));

      last_rc = NULL;
      result = 5;
      break;
    }

    /* trouver le dernier NL */
    last_rc = strrchr (work, '\n');
    /* s'il n'y a pas de NL, on cherche le dernier RC */
    if (last_rc == NULL)
      last_rc = strrchr (work, '\r');

    /* separer le buffer en deux : avant et apres le RC */
    if (last_rc != NULL)
      *last_rc = '\0';

    if (last_rc == NULL && strlen (work) > SZ_P) {
      syslog (LOG_ERR, "ERROR : scanmail strlen(work) = %d result = 6", strlen (work));
      result = 6;
      break;
    }

    /* traiter la premiere partie */
    p = work;
    while (result == 0) {
      long            d1, d2, d3, d4, d;
      char            sout[MAX_LINE];

      if (!p || !*p)
        break;

      /* 
         ** ST_INIT
       */
      if (*state == ST_INIT) {
        regmatch_t      pm_cd, pm_ct, pm_uu;
        int             ok_cd, ok_ct, ok_uu;

        if (log_level >= 20)
          syslog (LOG_DEBUG, "STATE ---> ST_INIT");

        if (content->field_type != CT_NONE) {
          save_content_field (content, list);
        }

        ok_cd = !regexec (&RE.re_cd, p, 1, &pm_cd, REG_NOTBOL | REG_NOTEOL);
        ok_ct = !regexec (&RE.re_ct, p, 1, &pm_ct, REG_NOTBOL | REG_NOTEOL);
        ok_uu = !regexec (&RE.re_uu, p, 1, &pm_uu, REG_NOTBOL | REG_NOTEOL);

        if (ok_uu) {
          int             duu, dcd, dct;

          duu = pm_uu.rm_so;
          dcd = LONG_MAX;
          dct = LONG_MAX;
          if (ok_cd)
            dcd = pm_cd.rm_so;
          if (ok_ct)
            dct = pm_ct.rm_so;
          if (min3 (duu, dcd, dct) == duu) {
            char           *t = sout;
            char            fname[MAX_LINE];

            strncpy (sout, &p[pm_uu.rm_so], pm_uu.rm_eo - pm_uu.rm_so);
            t += strcspn (t, " \t");
            t += strspn (t, " \t");
            t += strcspn (t, " \t");
            t += strspn (t, " \t");
            memset (fname, 0, sizeof (fname));
            strncpy (fname, t, strcspn (t, " \t\r\n"));

            if (is_rfc2047_encoded (fname)) {
              char            tmp[MAX_LINE];

              decode_rfc2047 (tmp, fname);
              strcpy (fname, tmp);
            }
            if (is_rfc2231_encoded (fname)) {
              char            tmp[MAX_LINE];

              decode_rfc2231 (tmp, fname);
              strcpy (fname, tmp);
            }
            content->field_type = CT_UUFILE;
            content->value = strdup (fname);

            if (content->value == NULL) {
              syslog (LOG_WARNING, "Error strdup CT_UUFILE %s : %s", fname,
                      strerror (errno));
            }
            p += pm_uu.rm_eo;
            continue;
          }
        }

        if (ok_cd || ok_ct) {
          int             pi = 0;

          *state = ST_VALUE;
          if (ok_cd) {
            pi = pm_cd.rm_eo;
            content->field_type = CT_DISP;
          }
          if (ok_ct) {
            pi = pm_ct.rm_eo;
            content->field_type = CT_TYPE;
          }
          if (ok_ct && ok_cd) {
            if (pm_cd.rm_eo < pm_ct.rm_eo) {
              pi = pm_cd.rm_eo;
              content->field_type = CT_DISP;
            } else {
              pi = pm_ct.rm_eo;
              content->field_type = CT_TYPE;
            }
          }
          p += pi;
          continue;
        } else {
          p += strlen (p);
          break;
        }
      }

      /* 
         ** ST_VALUE
       */
      if (*state == ST_VALUE) {
        if (log_level >= 20)
          syslog (LOG_DEBUG, "STATE ---> ST_VALUE");
        p += strspn (p, " \t\r\n");
        if (*p == '\0')
          break;

        d = strcspn (p, " \t\r\n;");
        if (d >= sizeof (sout))
          syslog (LOG_WARNING, "%08lX scan_block : d >= sizeof(sout) = %ld",
                  conn_id, d);
        strncpy (sout, p, d);
        sout[d] = '\0';
        p += d;

        while (*state == ST_VALUE) {
          /* end of buffer */
          if (*p == '\0') {
            *state = ST_CHECK;
            break;
          }
          /* end of line */
          if ((d = strspn (p, "\r\n")) > 0) {
            p += d;
            *state = ST_CHECK;
            break;
          }
          /* another attribute */
          if ((d = strcspn (p, ";")) == 0) {
            p++;
            *state = ST_TOKEN;
            break;
          }
          if ((d = strspn (p, " \t;")) > 0) {
            d1 = strspn (p, " \t");
            d2 = strspn (p, " \t;");
            d3 = strspn (p, " \t\r\n");
            d4 = strlen (p);

            /* XXX */
            d = min4 (d1, d2, d3, d4);
            if (d == d4) {
              *state = ST_CHECK;
              break;
            }
            if (d == d3) {
              p += d;
              *state = ST_CHECK;
              break;
            }
            if (d == d2) {
              p += d;
              *state = ST_TOKEN;
              break;
            }
            d = strlen (sout);
            /* XXX a voir - JOE 31/01/02 */
            if (d + d1 < sizeof (sout)) {
              strncat (sout, p, d1);
              sout[d + d1] = '\0';
            } else
              syslog (LOG_WARNING,
                      "%08lX scan_block : d + d1 >= sizeof(sout) = %ld",
                      conn_id, d + d1);

            content->value = strdup (sout);
            p += d;
          }
          *state = ST_INIT;
        }

        if (log_level >= 20)
          syslog (LOG_DEBUG, " ***  TAG    : %s", sout);

        if (content->value != NULL)
          free (content->value);
        content->value = strdup (sout);
        continue;
      }

      /* 
         ** ST_CHECK
       */
      if (*state == ST_CHECK) {
        if (log_level >= 20)
          syslog (LOG_DEBUG, "STATE ---> ST_CHECK");
        d = strspn (p, " \t\r\n");
        p += d;
        if (*p == '\0') {
          *state = ST_CHECK;
          continue;
        }
        if (*p == ';') {
          p++;
          *state = ST_TOKEN;
          continue;
        }
        *state = ST_INIT;
        continue;
      }

      /* 
         ** ST_TOKEN
       */
      if (*state == ST_TOKEN) {
        char            name[MAX_LINE];
        char            value[MAX_LINE];

        if (log_level >= 20)
          syslog (LOG_DEBUG, "STATE ---> ST_TOKEN");
        p += strspn (p, " \t\r\n");
        if (*p == '\0')
          continue;
        d1 = strascii (p, TSPECIALS, "");
        d2 = strcspn (p, "=");
        d3 = strascii (p, TSPECIALS, "");
        d4 = strcspn (p, " \t");
        d = min4 (d1, d2, d3, d4);
        /* why ??? */
        if (d == strlen (p)) {
          p += d;
          continue;
        }
        /* deux champs */
        if (d == d2) {
          int             rfc2231_code = 0;

          if (d >= sizeof (name))
            syslog (LOG_WARNING, "%08lX scan_block : d >= sizeof(name) = %ld",
                    conn_id, d);
          /* JOE */
          strncpy (name, p, d);
          name[d] = '\0';
          if (name[d - 1] == '*') {
            rfc2231_code = 1;
            name[d - 1] = '\0';
          }
          p += d;
          if (log_level >= 20)
            syslog (LOG_DEBUG, "      NAME   : %s", name);

          p++;
          /* decoder le deuxieme champs */
          if (*p == '"') {
            p++;
            d = strcspn (p, "\"\r\n");
          } else {
            int             dx = strcspn (p, "; \t\r\n");

            d = strascii (p, TSPECIALS, "");
            if (dx > d)
              d = dx;
            /* bug de Klez */
            if (1) {
              int             da, db;

              da = strcspn (p, "\r\n");
              db = strcspn (p, ";\r\n");
              if (da == db && da > 0 && db > 0) {
                char            tmpstr[MAX_LINE];

                strncpy (tmpstr, p, da);
                tmpstr[da] = '\0';
                while (da > 0 && tmpstr[da - 1] == ' ')
                  da--;
                if (da > d)
                  d = da;
              }
            }
          }
          if (d >= sizeof (value))
            syslog (LOG_WARNING,
                    "%08lX scan_block : d >= sizeof(value) = %ld", conn_id, d);
          strncpy (value, p, d);
          value[d] = '\0';
          p += d;
          if (*p == '"')
            p++;

          if (is_rfc2047_encoded (value)) {
            char            sout[1024];

            decode_rfc2047 (sout, value);
            strcpy (value, sout);
          }
          if (is_rfc2231_encoded (value)) {
            char            sout[1024];

            decode_rfc2231 (sout, value);
            strcpy (value, sout);
          }
          clean_tag_value (value);
          if (log_level >= 20)
            syslog (LOG_DEBUG, "      VALEUR : %s", value);
          add_content_field_attr (content, name, value);
          *state = ST_CHECK;
          continue;
        }

        /* un seul champs */
        if (d == d4) {
          if (d >= sizeof (sout))
            syslog (LOG_WARNING,
                    "%08lX scan_block : d >= sizeof(sout) = %ld (2)", conn_id, d);
          strncpy (sout, p, d);
          sout[d] = '\0';
          if (log_level >= 20)
            syslog (LOG_DEBUG, "      NAME  : %s ", sout);
          add_content_field_attr (content, sout, "");
          p += d;
          *state = ST_CHECK;
          continue;
        }
        *state = ST_INIT;
        continue;
      }

      break;
    }

    /* retourner le reste dans old */
    if (last_rc != NULL) {
      if (strlen ((char *) (last_rc + 1)) >= sizeof (old)) {
        strcpy (old, "");
        syslog (LOG_WARNING, "scan_block : sizeof(old) = %d > SZ_CHUNK", strlen (old));
        result = 7;
        break;
      }
      snprintf (old, sizeof (old), "%s", (char *) (last_rc + 1));
      *last_rc = '\n';
    }
  }

  memset (chunk, 0, sz_chunk);
  if (strlen (old) < sz_chunk)
    strcpy (chunk, old);
  else
    syslog (LOG_WARNING, "scan_block : sizeof(old) = %d > SZ_CHUNK", strlen (old));

#if MALLOC_WORK
  if (work != NULL)
    free (work);
#endif

  return result;
}


/* ****************************************************************************
 *                                                                            * 
 *                                                                            *
 **************************************************************************** */
