/* Copyright (C) 2005 to 2010 Chris Vine

The library comprised in this file or of which this file is part is
distributed by Chris Vine under the GNU Lesser General Public
License as follows:

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public License
   as published by the Free Software Foundation; either version 2.1 of
   the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License, version 2.1, for more details.

   You should have received a copy of the GNU Lesser General Public
   License, version 2.1, along with this library (see the file LGPL.TXT
   which came with this source code package in the c++-gtk-utils
   sub-directory); if not, write to the Free Software Foundation, Inc.,
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

*/

#ifndef CGU_REASSEMBLER_H
#define CGU_REASSEMBLER_H

#include <c++-gtk-utils/shared_handle.h>
#include <c++-gtk-utils/cgu_config.h>

namespace Cgu {

namespace Utf8 {


/**
 * @class Reassembler reassembler.h c++-gtk-utils/reassembler.h
 * @brief A class for reassembling UTF-8 strings sent over pipes and
 * sockets so they form complete valid UTF-8 characters.
 *
 * Utf8::Reassembler is a functor class which takes in a partially
 * formed UTF-8 string and returns a nul-terminated string comprising
 * such of the input string (after inserting, at the beginning, any
 * partially formed UTF-8 character which was at the end of the input
 * string passed in previous calls to the functor) as forms complete
 * UTF-8 characters (storing any partial character at the end for the
 * next call to the functor).  If the input string contains invalid
 * UTF-8 after adding any stored previous part character (apart from
 * any partially formed character at the end of the input string) then
 * operator() will return a null Cgu::SharedHandle<char*> object (that
 * is, Cgu::SharedHandle<char*>::get() will return 0).  Such input
 * will not be treated as invalid if it consists only of a single
 * partly formed UTF-8 character which could be valid if further bytes
 * were received and added to it.  In that case the returned
 * SharedHandle<char*> object will contain an allocated string of zero
 * length, comprising only a terminating \0 character, rather than a
 * NULL pointer.
 *
 * This enables UTF-8 strings to be sent over pipes, sockets, etc and
 * displayed in a GTK+ object at the receiving end
 *
 * Note that for efficiency reasons the memory held in the returned
 * Cgu::SharedHandle<char*> object may be greater than the length of
 * the nul-terminated string that is contained in that memory: just
 * let the Cgu::SharedHandle<char*> object manage the memory, and use
 * the contents like any other nul-terminated string.
 *
 * This class is not needed if std::getline(), with its default '\\n'
 * delimiter, is used to read UTF-8 characters using, say,
 * Cgu::fdistream, because a whole '\\n' delimited line of UTF-8
 * characters will always be complete.
 *
 * This is an example of its use, reading from a pipe until it is
 * closed by the writer and putting the received text in a
 * GtkTextBuffer object:
 * @code
 *   using namespace Cgu;
 *
 *   GtkTextIter end;
 *   GtkTextBuffer* text_buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_view));
 *   gtk_text_buffer_get_end_iter(text_buffer, &end);
 *
 *   Utf8::Reassembler reassembler;
 *   const int BSIZE = 1024;
 *   char read_buffer[BSIZE];
 *   ssize_t res;
 *   do {
 *     res = ::read(fd, read_buffer, BSIZE);
 *     if (res > 0) {
 *       SharedHandle<char*> utf8(reassembler(read_buffer, res));
 *       if (utf8.get()) {
 *         gtk_text_buffer_insert(text_buffer, &end,
 *                                utf8.get(), std::strlen(utf8));
 *       }
 *       else std::cerr << "Invalid utf8 text sent over pipe\n";
 *     }
 *   } while (res && (res != -1 || errno == EINTR));
 * @endcode
 */

class Reassembler {
  size_t stored;
  const static size_t buff_size = 6;
  char buffer[buff_size];
  char* join_buffer(const char*, size_t);
public:
/**
 * Takes a byte array of wholly or partly formed UTF-8 characters to
 * be converted (after taking account of previous calls to the method)
 * to a valid string of wholly formed characters.
 * @param input The input array.
 * @param size The number of bytes in the input (not the number of
 * UTF-8 characters).
 * @return A Cgu::SharedHandle<char*> object holding a nul-terminated
 * string comprising such of the input (after inserting, at the
 * beginning, any partially formed UTF-8 character which was at the
 * end of the input passed in previous calls to the functor) as forms
 * complete UTF-8 characters (storing any partial character at the end
 * for the next call to the functor).  If the input is invalid after
 * such recombination, then a null Cgu::SharedHandle<char*> object is
 * returned (that is, Cgu::SharedHandle<char*>::get() will return 0).
 * Such input will not be treated as invalid if it consists only of a
 * single partly formed UTF-8 character which could be valid if
 * further bytes were received and added to it.  In that case the
 * returned Cgu::SharedHandle<char*> object will contain an allocated
 * string of zero length, comprising only a terminating \0 character,
 * rather than a NULL pointer.
 * @exception std::bad_alloc The method might throw std::bad_alloc if
 * memory is exhausted and the system throws in that case.  It will
 * not throw any other exception.
 */
  Cgu::SharedHandle<char*> operator()(const char* input, size_t size);

/**
 * Gets the number of bytes of a partially formed UTF-8 character
 * stored for the next call to operator()().  It will not throw.
 * @return The number of bytes.
 */
  size_t get_stored() const {return stored;}

/**
 * Resets the Reassembler, by discarding any partially formed UTF-8
 * character from previous calls to operator()().  It will not throw.
 */
  void reset() {stored = 0;}

/**
 * The constructor will not throw.
 */
  Reassembler(): stored(0) {}

/* Only has effect if --with-glib-memory-slices-compat or
 * --with-glib-memory-slices-no-compat option picked */
  CGU_GLIB_MEMORY_SLICES_FUNCS
};

} // namespace Utf8

} // namespace Cgu

#endif
