Logo Search packages:      
Sourcecode: latrine version File versions  Download package

freedict.c

/* vim: set noet ts=4:
 *
 * Copyright (c) 2002-2004 Martin A. Godisch <martin@godisch.de>.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place, Suite 330, Boston, MA 02111-1307 USA.
 */
#include <freedict.h>
#include <charset.h>
#include <latrine.h>
#include <memory.h>
#include <stdio.h>

static gzFile dict = NULL;

/* open the dictionary
 *
 * returns  0: success
 * returns -1: failure (errmsg called)
 */
int open_freedict(const char *file)
{
      assert(dict == NULL);
      if ((dict = gzopen(file, "rb")) == NULL) {
            errmsg(_("cannot open dictionary: %s"), errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
            return -1;
      }
      return 0;
}

static inline int read_dict(gzFile F, char *buffer, size_t buflen)
{
      char *s;
      int  n;

      *buffer = 0;
      if (gzgets(F, buffer, buflen - 1) == Z_NULL && !gzeof(F)) {
            s = (char*)gzerror(F, &n);
            errmsg(_("cannot read dictionary: %s"), n == Z_ERRNO ? strerror(errno) : s);
            return -1;
      }
      if (*buffer == '\n' || *buffer == '#')
            *buffer = 0;
      if ((s = index(buffer, '\n')) != NULL)
            strcpy(s, ", ");
      return 0;
}

static inline int assign_word(char **target, char *buffer)
{
      char *s = NULL, *t = NULL;

      assert(*target == NULL);
      if ((s = index(buffer, '[')) != NULL)
            *s = 0;
      /* NOT: ((s = index(buffer, '{')) != NULL || (t = index(buffer, '}')) != NULL) */
      while (!((s = index(buffer, '{')) == NULL && (t = index(buffer, '}')) == NULL)) {
            if (t == NULL)
                  *s = 0;
            else if (t < s || s == NULL)
                  memmove(buffer, t + 1, strlen(t) + 1);
            else
                  memmove(s, t + 1, strlen(t) + 1);
      }
      for (s = buffer; *s == ' ' || *s == ','; s++)
            *s = 0;
      for (s = buffer + strlen(buffer) - 1; (*s == ' ' || *s == ',') && s >= buffer; s--)
            *s = 0;
      utf2local(buffer, BUFSIZE); /* FIXME: BUFSIZE? */
      if (*buffer != 0) {
            *target = STRDUP(buffer);
            *buffer = 0;
      }
      return 0;
}

/* read a struct word from dictionary and wordlist
 *
 * returns  1: success
 * returns  0: no more words available
 * returns -1: failure (errmsg called)
 */
int read_freedict(gzFile hits, struct word *w)
{
      char buf1[BUFSIZE];
      char buf2[BUFSIZE];
      char *s = NULL;
      static size_t n    = 0;
      static size_t line = 1;

      assert(dict != 0);
      memset(w, 0, sizeof(struct word));
      *buf2 = 0;
      while (!gzeof(dict)) {
            do if (read_dict(dict, buf1, BUFSIZE) == -1)
                  return -1;
            while (*buf1 == 0 && !gzeof(dict));
            while (strncmp(buf1, "     ", 5) != 0 && *buf1 != '\t' && *buf1 != 0) {
                  strncat(buf2, buf1, BUFSIZE - strlen(buf2) - 1);
                  if (read_dict(dict, buf1, BUFSIZE) == -1)
                        return -1;
            }
            if (assign_word(&w->lang[0], buf2) == -1)
                  return -1;
            while (strncmp(buf1, "     ", 5) == 0 || *buf1 == '\t') {
                  strncat(buf2, buf1 + (*buf1 == '\t' ? 1 : 5), BUFSIZE - strlen(buf2) - 1);
                  if (read_dict(dict, buf1, BUFSIZE) == -1)
                        return -1;
            }
            if (assign_word(&w->lang[1], buf2) == -1)
                  return -1;
            if (w->lang[0] != NULL && w->lang[1] != NULL && strncmp(w->lang[0], "00-", 3) != 0) {
                  w->pos = n++;
                  for (; hits != NULL && gzgets(hits, buf1, BUFSIZE) != Z_NULL; line++) {
                        if ((s = index(buf1, '\n')) != NULL)
                              *s = 0;
                        if (*buf1 == 0 || *buf1 == '#')
                              continue;
                        if (sscanf(buf1, "%06lx:%08lx", &w->index, &w->timestamp) < 1) {
                              errmsg(_("ignoring invalid format in wordlist line %d"), line);
                              w->index     = 0;
                              w->timestamp = 0;
                        }
                        break;
                  }
                  return 1;
            }
            FREE(&w->lang[0]);
            FREE(&w->lang[1]);
      }
      return 0;
}

/* close the dictionary
 *
 * returns  0: success
 * returns -1: failure (no errmsg)
 */
int close_freedict(void)
{
      int ret;

      assert(dict != NULL);
      ret  = gzclose(dict);
      dict = NULL;
      return ret;
}

Generated by  Doxygen 1.6.0   Back to index