Logo Search packages:      
Sourcecode: latrine version File versions  Download package

data.c

/* vim: set noet ts=4:
 *
 * Copyright (c) 2002-2007 Martin A. Godisch <martin@godisch.de>.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
 * St, Fifth Floor, Boston, MA 02110-1301, USA.
 */
#include <data.h>
#include <dictd.h>
#include <latrine.h>
#include <memory.h>
#include <stdio.h>
#include <time.h>

char
      *dictfile = NULL,
      *wordfile = NULL;
size_t
      randcount = DEFAULT_RANDCOUNT,
      wordlimit = DEFAULT_WORDLIMIT;
static size_t
      dictcount = 0,
      wordcount = 0;
static struct word
      *wordlist = NULL;
static double
      smallest_out = 0.0,
      largest_in   = 0.0;

size_t get_wordcount(void) { return wordcount; }
size_t get_dictcount(void) { return dictcount; }

static char *get_hash(const struct word *w)
{
      static char output[2 * MD5_DIGEST_SIZE + 1];
      char buf_un[MD5_DIGEST_SIZE + DIGEST_ALIGN];
      char buf_in[BUFSIZE];
      char *buf_out, *p;
      int i;
      struct Tlang *l;

      p = buf_un + DIGEST_ALIGN - 1;
      buf_out = p - (size_t)p % DIGEST_ALIGN;
      memset(buf_in, 0, sizeof(buf_in));
      memset(buf_un, 0, sizeof(buf_un));

      strncpy(buf_in, w->lang[0]->c, sizeof(buf_in) - 1);
      for (l = w->lang[0]->next; l != NULL; l = l->next) {
            strncat(buf_in + strlen(buf_in), "\1", sizeof(buf_in) - strlen(buf_in) - 1);
            strncat(buf_in + strlen(buf_in), l->c, sizeof(buf_in) - strlen(buf_in) - 1);
      }
      strncat(buf_in + strlen(buf_in), "\2", sizeof(buf_in) - strlen(buf_in) - 1);
      strncat(buf_in + strlen(buf_in), w->lang[1]->c, sizeof(buf_in) - strlen(buf_in) - 1);
      for (l = w->lang[1]->next; l != NULL; l = l->next) {
            strncat(buf_in + strlen(buf_in), "\1", sizeof(buf_in) - strlen(buf_in) - 1);
            strncat(buf_in + strlen(buf_in), l->c, sizeof(buf_in) - strlen(buf_in) - 1);
      }

      md5_buffer(buf_in, strlen(buf_in), buf_out);
      for (i = 0; i < MD5_DIGEST_SIZE; i++)
            sprintf(&output[2*i], "%02hhx", buf_out[i]);
      return output;
}

static int comp_by_access(const void *a, const void *b)
{
      time_t q1 = ((struct word*)a)->timestamp;
      time_t q2 = ((struct word*)b)->timestamp;
      if (q1 == q2)
            return 0;
      else if (q1 == 0)
            return +1;
      else if (q2 == 0)
            return -1;
      else if (q1 < q2)
            return -1;
      return +1;
}

static int comp_by_rate(const void *a, const void *b)
{
      double q1 = ((struct word*)a)->rate;
      double q2 = ((struct word*)b)->rate;
      if (q1 < q2)
            return -1;
      else if (q1 > q2)
            return +1;
      return comp_by_access(a, b);
}

static int comp_by_pos(const void *a, const void *b)
{
      double q1 = ((struct word*)a)->pos;
      double q2 = ((struct word*)b)->pos;
      if (q1 < q2)
            return -1;
      else if (q1 > q2)
            return +1;
      return 0;
}

static inline void rate(struct word *w)
{
      short r = 0;
      int i;

      assert(w != NULL);
      for (i = 0; i < HISTSIZE; i++)
            switch(w->history[i]) {
                  case '+': r++; break;
                  case '-': r--; break;
                  case ' ': break;
                  /* FIXME: default */
            }
      w->rate = r;
}

void update_word(struct word *w, int hit)
{
      assert(w != NULL);
      memmove(w->history + 1, w->history, HISTSIZE - 1);
      w->history[0] = hit ? '+' : '-';
      time(&w->timestamp);
      rate(w);
}

void debug_print_wordlist(void)
{
      size_t i;

      if (debug == NULL)
            return;
      for (i = 0; i < wordcount; i++)
            fprintf(debug, "%5d %08lx %3d |%s| \"%s\"\n",
                  wordlist[i].pos,
                  wordlist[i].timestamp,
                  wordlist[i].rate,
                  wordlist[i].history,
                  wordlist[i].lang[0]->c);
}

struct word *select_word(void)
{
      static size_t last[3] = {(size_t)(-1), (size_t)(-1), (size_t)(-1)};
      static int cycle = 0;
      size_t next = 0;

      if (randcount > wordcount)
            randcount = wordcount;
      assert(randcount > 0);
      if (cycle < 4) {
            if (debug)
                  fprintf(debug, "select_word: [%d] sorting by least-known\n", cycle);
            qsort(wordlist, wordcount, sizeof(struct word), comp_by_rate);
            largest_in = wordlist[wordcount-1].rate;
            cycle++;
      } else {
            if (debug)
                  fprintf(debug, "select_word: sorting by last-access [%d]\n", cycle);
            qsort(wordlist, wordcount, sizeof(struct word), comp_by_access);
            cycle = 0;
      }
      debug_print_wordlist();
      do {
            next = random() % randcount;
            if (debug)
                  fprintf(debug, "select_word: [r=%d w=%d] [l=%d:%d:%d] [p=%d n=%d] selecting \"%s\"\n",
                        randcount, wordcount, last[0], last[1], last[2],
                        wordlist[next].pos, next, wordlist[next].lang[0]->c);
      } while ((wordlist[next].pos == last[0] && randcount > 1)
            || (wordlist[next].pos == last[1] && randcount > 2)
            || (wordlist[next].pos == last[2] && randcount > 3));
      last[2] = last[1];
      last[1] = last[0];
      last[0] = wordlist[next].pos;
      return &wordlist[next];
}

inline void free_langlist(struct word *w)
{
      struct Tlang *p, *q;

      q = w->lang[0];
      while((p = q) != NULL) {
            q = p->next;
            free(p);
      }
      q = w->lang[1];
      while((p = q) != NULL) {
            q = p->next;
            free(p);
      }
      w->lang[0] = NULL;
      w->lang[1] = NULL;
}

static inline void free_wordlist(void)
{
      size_t i;

      for (i = 0; i < wordcount; i++)
            free_langlist(&wordlist[i]);
      if (wordlist != NULL)
            free(wordlist);
      wordlist  = NULL;
      wordcount = 0;
}

static inline void print_wordlist_intro(gzFile *F)
{
      assert(F != NULL);
      gzprintf(F, HEADER, WORDLIST_VERSION);
      gzprintf(F, _("# Dictionary: %s\n"
            "# Do not change the first line or this file cannot be read anymore!\n"
            "# Do not rename this file or it cannot be found anymore!\n"
            "# Do not edit while LaTrine is running, your changes will be overridden!\n"
            "# Be careful not to destroy the position-dependent mapping with the dictionary!\n\n"),
            dictfile);
}

static gzFile open_wordfile(void)
{
      gzFile
            hits = NULL,
            tmp  = NULL;
      char
            buffer[BUFSIZE],
            hbuf[HISTSIZE+1],
            *tmpfile = NULL;
      long unsigned int
            history,
            timestamp;
      int
            version, i;

      if (debug)
            fprintf(debug, "open_wordfile: %s\n", wordfile);
      if ((hits = gzopen(wordfile, "rb")) == NULL) {
            if (errno != ENOENT)
                  errmsg("gzopen: %s: %s", wordfile, errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
            return(NULL);
      }
      if (gzgets(hits, buffer, BUFSIZE) == Z_NULL) {
            if (debug)
                  fprintf(debug, "open_wordfile: empty wordlist file\n");
            gzclose(hits);
            return NULL;
      }
      if (sscanf(buffer, HEADER, &version) == 1) {
            if (debug)
                  fprintf(debug, "open_wordfile: file version %d\n", version);
            switch(version) {
            case 2:
                  if (debug)
                        fprintf(debug, "open_wordfile: converting wordlist file version %d\n", version);
                  gzrewind(hits);
                  tmpfile = (char*)MALLOC(strlen(wordfile) + 5);
                  sprintf(tmpfile, "%s.new", wordfile);
                  if ((tmp = gzopen(tmpfile, "wb")) == NULL) {
                        errmsg("gzopen: %s: %m", tmpfile);
                        gzclose(hits);
                        FREE(&tmpfile);
                        return NULL;
                  }
                  print_wordlist_intro(tmp);
                  while (gzgets(hits, buffer, sizeof(buffer)) != Z_NULL) {
                        if (*buffer == '\n' || *buffer == '#' || sscanf(buffer, "%06lx:%08lx", &history, &timestamp) < 2)
                              continue;
                        if (history == 0) {
                              memset(hbuf, ' ', HISTSIZE);
                              hbuf[HISTSIZE] = 0;
                        } else {
                              for (i = 1; i <= 0x800000; i *= 2)
                              if (history & i)
                                    strcat(hbuf, "+");
                              else
                                    strcat(hbuf, "-");
                              for (i = 24; i < HISTSIZE; i++)
                                    strcat(hbuf, " ");
                        }
                        gzprintf(tmp, "%08lx:%s:\n", timestamp, hbuf);
                  }
                  gzclose(hits);
                  gzclose(tmp);
                  if (rename(tmpfile, wordfile) < 0) {
                        errmsg("rename: %s, %s: %m", tmpfile, wordfile);
                        FREE(&tmpfile);
                        return NULL;
                  }
                  FREE(&tmpfile);
                  hits = gzopen(wordfile, "rb");
                  assert(hits != NULL);
                  return hits;
            case 3:
                  gzrewind(hits);
                  return hits;
            default:
                  if (debug)
                        fprintf(debug, "open_wordfile: ignoring wordlist because of incompatible version\n");
                  gzclose(hits);
                  return NULL;
            }
      } else if (debug)
            fprintf(debug, "open_wordfile: no version marker found: %s\n", wordfile);
      gzclose(hits);
      return NULL;
}

/* read a struct word from dictionary and wordlist
 *
 * returns  1: success
 * returns  0: no more words available
 * returns -1: failure
 */
static int read_dict(gzFile hits, struct word *w)
{
      char buffer[BUFSIZE];
      char *c = NULL;
      static size_t
            n    = 0,
            line = 1;
      size_t i;

      memset(w, 0, sizeof(*w));

      if ((i = read_dictd(w)) != 1)
            return i;

      if (debug)
            fprintf(debug, "read_dict: hash = %s\n", get_hash(w));

      w->pos = n++;
      w->history[HISTSIZE] = 0;
      memset(w->history, ' ', HISTSIZE);

      for (c = NULL; hits != NULL && gzgets(hits, buffer, BUFSIZE) != Z_NULL; line++) {
            if (*buffer == '\n' || *buffer == '#')
                  continue;
            w->timestamp = strtol(buffer, &c, 16);
            if (*c == ':') {
                  memcpy(w->history, ++c, HISTSIZE);
                  for (i = 0; c != NULL && i < HISTSIZE; i++)
                        if (c[i] != ' ' && c[i] != '+' && c[i] != '-') {
                              memset(w->history, ' ', HISTSIZE);
                              break;
                        }
            } else {
                  w->timestamp = 0;
                  errmsg(_("ignoring invalid format in wordlist line %d"), line);
            }
            return 1;
      }
      /* FIXME: warn about "hits" problems */

      return 1;
}

/* load the dictionary and the corresponding hits file,
 * an existing wordlist will be overridden
 *
 * returns  0: success
 * returns -1: failure
 */
int load_wordlist(void)
{
      int (*open_dict)(const char*) = open_dictd;
      int (*close_dict)(void)       = close_dictd;
      gzFile hits = NULL;
      struct word w;
      size_t cursize, i;
      time_t randinit;
      int    ret;

      assert(dictfile != NULL);
      assert(wordfile != NULL);
      if (wordlimit == 0) {
            if (wordlist == NULL)
                  wordlist = (struct word*)MALLOC((cursize = WORDSTEP) * sizeof(struct word));
            else
                  cursize  = wordcount;
      } else {
            if (wordlist == NULL)
                  wordlist = (struct word*)MALLOC(wordlimit * sizeof(struct word));
            else {
                  wordlist = (struct word*)REALLOC(wordlist, wordlimit * sizeof(struct word));
                  memset(&wordlist[wordcount], 0, &wordlist[wordlimit] - &wordlist[wordcount]);
            }
            cursize = wordlimit;
      }
      if (open_dict(dictfile) == -1)
            return -1;
      hits = open_wordfile();
      for (dictcount = 0, wordcount = 0; (ret = read_dict(hits, &w)) == 1; dictcount++) {
            if (wordcount >= cursize && wordlimit == 0) {
                  assert(wordcount == cursize);
                  wordlist = (struct word*)REALLOC(wordlist, (cursize += WORDSTEP) * sizeof(struct word));
                  memset(&wordlist[wordcount], 0, &wordlist[cursize] - &wordlist[wordcount]);
            }
            rate(&w);
            if (wordcount < cursize) {
                  if (debug)
                        fprintf(debug, "load_wordlist: [+] %5d %08lx %3d |%s| \"%s\"\n",
                              w.pos, w.timestamp, w.rate, w.history, w.lang[0]->c);
                  if (wordcount == 0 || w.rate > largest_in)
                        largest_in = w.rate;
                  free_langlist(&wordlist[wordcount]);
                  wordlist[wordcount++] = w;
                  continue;
            }
            /* wordcount >= cursize */
            if (w.rate < largest_in) {
                  for (i = 0; i < cursize; i++)
                        if (wordlist[i].rate == largest_in)
                              break;
                  assert(i < cursize);
                  if (debug)
                        fprintf(debug, "load_wordlist: [=%d] %5d %08lx %3d |%s| \"%s\"\n", i,
                              w.pos, w.timestamp, w.rate, w.history, w.lang[0]->c);
                  free_langlist(&wordlist[i]);
                  wordlist[i]  = w;
                  smallest_out = largest_in;
                  largest_in   = w.rate;
                  for (i = 0; i < cursize; i++)
                        if (wordlist[i].rate > largest_in)
                              largest_in = wordlist[i].rate;
                  continue;
            }
            if (debug)
                  fprintf(debug, "load_wordlist: [-] %5d %08lx %3d |%s| \"%s\"\n",
                        w.pos, w.timestamp, w.rate, w.history, w.lang[0]->c);
            if (wordcount == cursize || w.rate < smallest_out)
                  smallest_out = w.rate;
            free_langlist(&w);
      }
      free_langlist(&w);
      close_dict();
      if (hits != NULL)
            gzclose(hits);
      if (ret == -1 || wordcount == 0)
            free_wordlist();
      else {
            if (wordcount < cursize)
                  wordlist = (struct word*)REALLOC(wordlist, wordcount * sizeof(struct word));
            time(&randinit);
            srandom(randinit);
            qsort(wordlist, wordcount, sizeof(struct word), comp_by_rate);
      }
      if (ret == 0 && wordcount == 0) {
            errmsg(_("invalid or empty dictionary"));
            ret = -1;
      }
      return ret;
}

/* save the dictionary and the corresponding hits file
 *
 * returns  0: success
 * returns -1: failure
 */
int save_wordlist(void)
{
      char buffer[BUFSIZE];
      char *tempfile   = NULL;
      gzFile old       = NULL;
      gzFile new       = NULL;
      const char *zmsg = NULL;
      unsigned long
            timestamp = 0;
      size_t i, n, j;
      int ret;
      char *s, history[HISTSIZE+1];

      if (wordcount == 0)
            return 0;
      tempfile = (char*)MALLOC(strlen(wordfile) + 5);
      sprintf(tempfile, "%s.new", wordfile);
      old = open_wordfile();
      if ((new = gzopen(tempfile, "wb")) == NULL) {
            errmsg("gzopen: %s: %s", tempfile, errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
            FREE(&tempfile);
            return -1;
      }
      if (debug)
            fprintf(debug, "save_wordlist: writing to %s\n", tempfile);
      print_wordlist_intro(new);
      qsort(wordlist, wordcount, sizeof(struct word), comp_by_pos);
      for (i = 0, n = 0; gzgets(old, buffer, BUFSIZE) != Z_NULL;) {
            if (*buffer == '\n' || *buffer == '#')
                  continue;
            if (i < wordcount && wordlist[i].pos == n) {
                  gzprintf(new, "%08lx:%s:\n", wordlist[i].timestamp, wordlist[i].history);
                  i++;
            } else {
                  timestamp = strtol(buffer, &s, 16);
                  history[HISTSIZE] = 0;
                  if (*s == ':') {
                        memcpy(history, ++s, HISTSIZE);
                        for (j = 0; s != NULL && j < HISTSIZE; j++)
                              if (s[j] != ' ' && s[j] != '+' && s[j] != '-') {
                                    memset(history, ' ', HISTSIZE);
                                    break;
                              }
                  } else {
                        timestamp = 0;
                        memset(history, ' ', HISTSIZE);
                  }
                  gzprintf(new, "%08lx:%s:\n", timestamp, history);
            }
            n++;
      }
      if (old != NULL)
            gzclose(old);
      memset(buffer, ' ', HISTSIZE);
      buffer[HISTSIZE] = 0;
      for (; n < dictcount; n++)
            if (i < wordcount && wordlist[i].pos == n) {
                  gzprintf(new, "%08lx:%s:\n", wordlist[i].timestamp, wordlist[i].history);
                  i++;
            } else
                  gzprintf(new, "%08lx:%s:\n", 0, buffer);
      if ((ret = gzclose(new)) != Z_OK) {
            zmsg = gzerror(new, &ret);
            errmsg("gzclose: %s: %s", ret == Z_ERRNO ? strerror(errno) : zmsg);
            FREE(&tempfile);
            return -1;
      }
      if (rename(tempfile, wordfile) != 0) {
            errmsg("rename: %s, %s: %m", tempfile, wordfile);
            FREE(&tempfile);
            return -1;
      }
      FREE(&tempfile);
      return 0;
}

Generated by  Doxygen 1.6.0   Back to index