/* 
logpp (Log PreProcessor) 0.16 - process.c
Copyright (C) 2006-2008 Risto Vaarandi

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

#include "common.h"
#include "logpp.h"

/* match_data() compares the input buffer of the 'file' with the condition
   'cond', and if there is a match and the condition contains a template
   definition, 'line' is filled according to the definition, including
   the terminating 0. The size of 'line' is given with the 'size' parameter. 
   If there was no match, -1 is returned, otherwise the number of bytes 
   written to the 'line' is returned (the number does not include the
   terminating 0). If 'cond' has no template definition, 0 is returned */

ssize_t
match_data(struct src *file, struct cond *cond, char *line, size_t size)
{
  size_t i, len, len2, count; 
  ssize_t num, start, end;
  char *matchbuf;
  char regmatch;
#ifdef HAVE_LIBPCRE
  int ovector[MVARBUFSIZEx3];
  int ovecpairs = 0; /* avoid gcc warning */
#else
  regmatch_t match[MVARBUFSIZE];
#endif

  /* if N>1 lines take part in the match operation, take last N lines from 
     the input buffer, concatenate them, save the result to the match buffer
     and point 'matchbuf' to the match buffer; if one line takes part in the
     match operation, point 'matchbuf' to the last line in the input buffer;
     set 'len' to the number of bytes in 'matchbuf' (without terminating 0) */

  if (cond->num > 1) {
    len = 0;
    /* size_t is unsigned, avoid creating negative values */
    if (file->lbuf_pos >= cond->num) i = file->lbuf_pos - cond->num;
      else i = (file->lbuf_pos + IBUFSIZE) - cond->num;

    do {
      i = (i + 1) % IBUFSIZE;
      memcpy(MATCHBUF + len, file->lbuf[i].bytes, file->lbuf[i].len);
      len += file->lbuf[i].len;
      MATCHBUF[len++] = NEWLINE;
    } while (i != file->lbuf_pos); 

    --len;
    MATCHBUF[len] = 0;
    matchbuf = MATCHBUF;
  } else {
    matchbuf = file->lbuf[file->lbuf_pos].bytes;
    len = file->lbuf[file->lbuf_pos].len;
  }

  /* if the pattern is a regular expression (cond->num is not 0), set
     the 'regmatch' variable to the result of the match and return if the 
     'negative' flag is set accordingly; if the pattern is a truth value, 
     return if the 'negative' flag is 1, otherwise set 'regmatch' to 0 */

  if (cond->num) {
    /* compare the regular expression with input line(s) */
#ifdef HAVE_LIBPCRE
    /* pcre_exec() returns the number of the largest bracketing construct
       that returned a value + 1, zero if the match variable buffer was 
       too small, or a negative value if there was no match */
    ovecpairs = pcre_exec(cond->regexp, cond->extra, matchbuf, len, 0, 0,
                          ovector, MVARBUFSIZEx3);
    if (!ovecpairs) {
      /* if the regular expression contained more than MVARBUFSIZE - 1
         bracketing constructs, use as many returned values as possible */
      regmatch = 1;
      ovecpairs = MVARBUFSIZE;
    }
    else regmatch = (ovecpairs > 0);
#else
    regmatch = !regexec(&cond->regexp, matchbuf, MVARBUFSIZE, match, 0);
#endif
    /* if regular expression matched but its type is nregexp, return; 
       if regular expression didn't match but its type is regexp, return */
    if (regmatch && cond->negative) return -1;
    if (!regmatch && !cond->negative) return -1;
  } else {
    /* if the pattern is a truth value 'false', return; if the pattern
       is a truth value 'true', continue with 'regmatch' set to 0 */
    if (cond->negative) return -1;
    regmatch = 0;
  }

  /* fill 'line' according to the template definition */
  count = 0;

  for (i = 0; i < cond->templ_size; ++i)
    /* the template part is a match variable */
    if (!cond->template[i].str) {
      num = cond->template[i].num;
      /* if the variable is $~, add input file name to 'line' */
      if (num == -1) {
        len2 = strlen(file->name);
        if (count + len2 < size) {
          memcpy(line + count, file->name, len2);
          count += len2;
        } else {
          memcpy(line + count, file->name, size - count - 1);
          count = size - 1;
          break;
        }
      }
      /* if the variable is $0, add all bytes from 'matchbuf' to 'line' */
      else if (num == 0) {
        if (count + len < size) {
          memcpy(line + count, matchbuf, len);
          count += len;
        } else {
          memcpy(line + count, matchbuf, size - count - 1);
          count = size - 1;
          break;
        }
      }
      /* if the variable is $N (N>0), add the value of the corresponding
         match variable to 'line' if 'regmatch' is 1 (if 'regmatch' is 0,
         no regular expression matching has been done and match variables 
         don't have a value) */
      else {
#ifdef HAVE_LIBPCRE
        if (regmatch && num < ovecpairs) {
          start = ovector[2*num]; 
          end = ovector[2*num + 1];
        } else {
          start = -1;
          end = -1;
        }
#else
        start = match[num].rm_so;
        end = match[num].rm_eo;
#endif
        if (regmatch && start != -1 && end != -1) {
          len2 = end - start;
          if (count + len2 < size) {
            memcpy(line + count, matchbuf + start, len2);
            count += len2;
          } else {
            memcpy(line + count, matchbuf + start, size - count - 1);
            count = size - 1;
            break;
          }
        }
      }
    }
    /* if the template part is a string, add the string to 'line' */
    else {
      if (count + cond->template[i].len < size) {
        memcpy(line + count, cond->template[i].str, cond->template[i].len);
        count += cond->template[i].len;
      } else {
        memcpy(line + count, cond->template[i].str, size - count - 1);
        count = size - 1;
        break;
      }
    }

  /* increase the match counter and add the terminating 0 to 'line' */
  ++cond->counter;
  line[count] = 0;
  return count;
}

/* write_outputs() writes 'line' to all output destinations associated
   with 'flow' (the size of 'line' is 'count' bytes + terminating 0) */

void 
write_outputs(struct flow *flow, char *line, size_t count)
{
  struct output *output;
  time_t time;
  size_t i, j;

  /* scan the lists of output destinations */
  time = 0;

  for (i = 0; i < flow->ol_size; ++i)
    for (j = 0, output = flow->olist[i]; j < output->dstl_size; ++j) {
      /* the destination is not a file, ignore the write's return value */
      if (output->dstlist[j].type != SRCDST_FILE) {
        write_line(output->dstlist + j, line, count);
        continue;
      }
      /* if the destination is a closed file, REOPENINT was set with 
         a command line option and REOPENINT seconds have elapsed since 
         the last open attempt, attempt to open the file again */
      if (output->dstlist[j].fd == -1 && REOPENINT) {
        /* cache the current time if it is not obtained yet */
        if (!time) time = get_time();
        /* check if reopen is needed */
        if (time - output->dstlist[j].tloa >= REOPENINT) {
          if (open_dst_file(output->dstlist + j) == -1) {
            log_msg(LOG_ERR, "Failed to reopen output file %s",
                             output->dstlist[j].name);
            continue;
          }
          else log_msg(LOG_NOTICE, "Succeeded to reopen output file %s", 
                                   output->dstlist[j].name);
        }
      }
      /* the destination is an open file, check the write's return value,
         and if an IO error occurred during the write, close the file */
      if (output->dstlist[j].fd != -1)
        if (!write_line(output->dstlist + j, line, count)) {
          log_msg(LOG_ERR, "IO error, closing output file %s", 
                           output->dstlist[j].name);
          if (!close_dst_file(output->dstlist + j))
            log_msg(LOG_ERR, "Failed to close output file %s",
                             output->dstlist[j].name);
        }
    }
}

/* process_src_file() compares the input buffer of the 'file' with filters
   of the flows associated with 'file', and writes output from matching
   filter conditions to outputs associated with 'file' (the 'input' 
   parameter is a pointer to the input definition the 'file' is part of) */

void
process_src_file_data(struct input *input, struct src *file)
{
  struct fl_elem *elem;
  struct flow *flow;
  struct filter *filter;
  ssize_t count;
  size_t i, j;

  /* check all flows 'file' is associated with */
  for (elem = input->flowlist; elem; elem = elem->next)
    /* for each flow, scan the list of its filters */
    for (i = 0, flow = elem->flow; i < flow->fl_size; ++i)
      /* for each filter, scan the list of its conditions */
      for (j = 0, filter = flow->flist[i]; j < filter->condl_size; ++j) {
        /* compare the input buffer of 'file' with the condition */
        count = match_data(file, filter->condlist + j, OUTBUF, OBUFSIZE);
        /* if the condition matched and data was returned, write the data 
           to outputs associated with 'file', stop the scanning of the 
           condition list and go to the next filter */
        if (count > 0) {
          write_outputs(flow, OUTBUF, count);
          break;
        } 
        /* if the condition matched and no data was returned, stop the
           scanning of the condition list and go to the next filter */
        if (count == 0) break;
      }
}

/* read_and_process() scans the list of inputs, attempting to read new data
   from all open input sources associated with each input. If new data
   were acquired from a source, process_src_file() is called for processing
   the data; if no new data were acquired, the source status is checked 
   for discovering rotation/truncation condition and reopened, if necessary; 
   if an IO error occurred, the source is closed. If the source is in the 
   closed state and REOPENINT seconds have elapsed since the last open 
   attempt, the function attempts to open the source. The function returns 
   1 if new data were acquired from at least one source, and 0 otherwise */

int
read_and_process(void)
{
  struct input *input;
  time_t time;
  size_t i;
  off_t pos;
  int ret;

  /* scan the list of inputs */
  time = 0;
  ret = 0;

  for (input = INPUTLIST; input; input = input->next)
    for (i = 0; i < input->srcl_size; ++i) {
      /* if the input source is closed, REOPENINT was set with a command 
         line option and REOPENINT seconds have elapsed since the last
         open attempt, attempt to open the source again; if the source
         didn't exist at the last open attempt, set the file offset to
         the beginning of the file, otherwise to the end */
      if (input->srclist[i].fd == -1 && REOPENINT) {
        /* cache the current time if it is not obtained yet */
        if (!time) time = get_time();
        /* check if reopen is needed */
        if (time - input->srclist[i].tloa >= REOPENINT) {
          if (input->srclist[i].status == SDSTAT_NONE) pos = 0;
            else pos = -1;
          if (open_src_file(input->srclist + i, pos) == -1) {
            /* if the input file does not exist, don't log an error */
            if (input->srclist[i].status != SDSTAT_NONE)
              log_msg(LOG_ERR, "Failed to reopen input file %s",
                               input->srclist[i].name);
            continue;
          }
          else log_msg(LOG_NOTICE, "Succeeded to reopen input file %s", 
                                   input->srclist[i].name); 
        }
      }

      /* if the input source is open, read data from the source */
      if (input->srclist[i].fd != -1) 
        switch (read_line(input->srclist + i)) {
         /* if new data were acquired, process the data */
         case 1:
          process_src_file_data(input, input->srclist + i);
          ret = 1;
          break;
         /* if no new data were acquired, check the source status */
         case 0:
          if (src_file_shuffled(input->srclist + i)) {
            /* if the source has been recreated or truncated, reopen it
               with the file offset at the beginning of the source */
            log_msg(LOG_NOTICE, 
                    "Reopening recreated or truncated input file %s",
                    input->srclist[i].name);
            if (!close_src_file(input->srclist + i))
              log_msg(LOG_ERR, "Failed to close input file %s",
                               input->srclist[i].name);
            if (open_src_file(input->srclist + i, 0) == -1)
              log_msg(LOG_ERR, "Failed to reopen input file %s",
                               input->srclist[i].name);
          }
          break;
         /* if an IO error occurred during the read, close the source */
         case -1:
          log_msg(LOG_ERR, "IO error, closing input file %s", 
                           input->srclist[i].name);
          if (!close_src_file(input->srclist + i))
            log_msg(LOG_ERR, "Failed to close input file %s",
                             input->srclist[i].name);
        }
    }

  return ret;
}
