bmf

bmf (Bayesian Mail Filter) 0.9.4 fork + patches
git clone git://git.codemadness.org/bmf
Log | Files | Refs | README | LICENSE

commit f5e56cc70c117352ec5b7a7984065eaa65db162f
parent 20a0f52d5b478e240450fd72fa3bbd3ab5c58c48
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sat, 27 Oct 2018 18:37:01 +0200

many improvements

- update README: only flat files are supported now.
- remove bmf.spec.in file.
- remove unused functions.
- remove some assert() calls.
- dbtext_db_open() improvements:
  - check strdup call + perror message.
  - malloc + perror message.
  - check for empty directory string (just in case).
  - use snprintf (just in case).
  - free memory on failure condition.
- dbtext_db_opentable() improvements:
  - use snprintf, error on path truncation.
- remove unit tests (not maintained).
- code-style improvements.

Diffstat:
Makefile | 1-
README | 5+----
bmf.c | 1-
bmf.spec.in | 64----------------------------------------------------------------
config.h | 1-
dbg.c | 14+-------------
dbg.h | 3---
dbh.c | 58+++++++++++++++++++++++++++++-----------------------------
dbh.h | 46+++++++++++++++++++++-------------------------
dbtext.c | 166+++++++++++++++++++++++++++----------------------------------------------------
filt.c | 8+++-----
lex.c | 45---------------------------------------------
lex.h | 22+++++++++++-----------
str.c | 51++++-----------------------------------------------
str.h | 4----
vec.c | 215-------------------------------------------------------------------------------
vec.h | 50+++++++++++++++++---------------------------------
17 files changed, 144 insertions(+), 610 deletions(-)

diff --git a/Makefile b/Makefile @@ -50,7 +50,6 @@ dist: cp -f ${MAN1} ${DOC} ${HDR} \ ${SRC} ${SCRIPTS} \ Makefile \ - bmf.spec.in \ "${NAME}-${VERSION}" # make tarball tar -cf - "${NAME}-${VERSION}" | \ diff --git a/README b/README @@ -13,10 +13,7 @@ This project provides features which are not available in other filters: (1) Independence from external programs and libraries. Tokens are stored in memory using simple vectors which require no heavyweight external data -structure libraries. Multiple token database formats are supported, -including flat files, libdb, and mysql. Conversion between formats will -always be possible with the included import/export utility and flat files -will always remain an option. +structure libraries. The tokens are stored in plain-text "flat" files. (2) Efficient processing. Input data is parsed by a handcrafted parser which weighs in under 3% of the equivalent code generated by flex. No diff --git a/bmf.c b/bmf.c @@ -98,7 +98,6 @@ main(int argc, char **argv) err(1, "pledge"); srand(time(NULL)); - atexit(dump_alloc_heap); stats.keepers = DEF_KEEPERS; while ((ch = getopt(argc, argv, "NSVd:hk:m:npstv")) != EOF) { diff --git a/bmf.spec.in b/bmf.spec.in @@ -1,64 +0,0 @@ -Name: bmf -Version: VERSION -Release: 1 -URL: http://www.sourceforge.net/projects/bmf -Source0: %{name}-%{version}.tar.gz -License: GPL -Group: Applications/Internet -Summary: fast anti-spam filtering by Bayesian statistical analysis -Buildroot: %{_tmppath}/%{name}-%{version}-root - -%description -bmf is a Bayesian mail filter. It takes an email message or other text on -stdin, does a statistical check against lists of "good" and "spam" words, -and returns a status code indicating whether or not the message is spam. -bmf is efficient, small, and self-contained. - -%prep - -%setup - -%build -./configure --with-libdb --without-mysql -make - -%install -[ -n "$RPM_BUILD_ROOT" -a "$RPM_BUILD_ROOT" != / ] && rm -rf $RPM_BUILD_ROOT -make DESTDIR=${RPM_BUILD_ROOT} install -gzip $RPM_BUILD_ROOT/%{_mandir}/*/*.? - - -%files -%{_bindir}/bmf -%{_mandir}/man1/bmf.1.gz -%{_bindir}/bmfconv -%{_mandir}/man1/bmfconv.1.gz -%doc README LICENSE - -%changelog -* Mon Oct 14 2002 Tom Marshall <tommy@tig-grr.com> -- Update to version 0.9.3. - -* Sat Oct 12 2002 Tom Marshall <tommy@tig-grr.com> -- Update to version 0.9.2. - -* Sat Oct 12 2002 Tom Marshall <tommy@tig-grr.com> -- Update to version 0.9.1. - -* Wed Oct 09 2002 Tom Marshall <tommy@tig-grr.com> -- Update to version 0.84. - -* Mon Oct 07 2002 Tom Marshall <tommy@tig-grr.com> -- Update to version 0.83. - -* Sat Oct 05 2002 Tom Marshall <tommy@tig-grr.com> -- Update to version 0.82. - -* Thu Oct 03 2002 Tom Marshall <tommy@tig-grr.com> -- Update to version 0.81. -- Add bmfconv. -- Use new configure script. - -* Fri Sep 27 2002 Tom Marshall <tommy@tig-grr.com> -- Initial build. - diff --git a/config.h b/config.h @@ -19,7 +19,6 @@ #include <errno.h> #include <math.h> #include <ctype.h> -#include <assert.h> /************************************** * System headers diff --git a/dbg.c b/dbg.c @@ -15,7 +15,7 @@ uint g_verbose = 0; -void +void verbose(int level, const char *fmt,...) { va_list v; @@ -26,15 +26,3 @@ verbose(int level, const char *fmt,...) va_end(v); } } - -void -dbgout(const char *fmt,...) -{ - /* empty */ -} - -void -dump_alloc_heap(void) -{ - /* empty */ -} diff --git a/dbg.h b/dbg.h @@ -14,7 +14,4 @@ extern uint g_verbose; void verbose( int level, const char* fmt, ... ); -void dbgout( const char* fmt, ... ); -void dump_alloc_heap( void ); - #endif /* ndef _DBG_H */ diff --git a/dbh.c b/dbh.c @@ -24,43 +24,43 @@ * * the list referenced in the iterator must be sorted. */ -uint db_getnewcount( veciter_t* piter ) +uint +db_getnewcount(veciter_t * piter) { - str_t* pstr; - uint count; - veciter_t curiter; - str_t* pcurstr; + str_t *pstr; + uint count; + veciter_t curiter; + str_t *pcurstr; - pstr = &piter->plist->pitems[piter->index]; - count = 0; + pstr = &piter->plist->pitems[piter->index]; + count = 0; - curiter.plist = piter->plist; - curiter.index = piter->index; - pcurstr = &curiter.plist->pitems[curiter.index]; + curiter.plist = piter->plist; + curiter.index = piter->index; + pcurstr = &curiter.plist->pitems[curiter.index]; - while( curiter.index < curiter.plist->nitems && str_casecmp( pstr, pcurstr ) == 0 ) - { - piter->index = curiter.index; - count = min( MAXFREQ, count + 1 ); - veciter_next( &curiter ); - pcurstr = &curiter.plist->pitems[curiter.index]; - } + while (curiter.index < curiter.plist->nitems && str_casecmp(pstr, pcurstr) == 0) { + piter->index = curiter.index; + count = min(MAXFREQ, count + 1); + veciter_next(&curiter); + pcurstr = &curiter.plist->pitems[curiter.index]; + } - return count; + return count; } -dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass ) +dbh_t * +dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass) { - dbh_t* pdb = NULL; + dbh_t *pdb; - switch( dbfmt ) - { - case db_text: - pdb = (dbh_t*)dbtext_db_open( dbhost, dbname, dbuser, dbpass ); - break; - default: - assert(false); - } + switch (dbfmt) { + case db_text: + pdb = (dbh_t *) dbtext_db_open(dbhost, dbname, dbuser, dbpass); + break; + default: + break; + } - return pdb; + return NULL; } diff --git a/dbh.h b/dbh.h @@ -11,44 +11,40 @@ #define _DBH_H /* database formats */ -typedef enum -{ - db_text /* flat text */ -} dbfmt_t; +typedef enum { + db_text /* flat text */ +} dbfmt_t; /* record/field structure */ -typedef struct _rec -{ - str_t w; - uint n; -} rec_t; +typedef struct _rec { + str_t w; + uint n; +} rec_t; /* database table */ typedef struct _dbt dbt_t; -struct _dbt -{ - bool_t (*close)(dbt_t*); - bool_t (*mergeclose)(dbt_t*,vec_t*); - bool_t (*unmergeclose)(dbt_t*,vec_t*); - bool_t (*import)(dbt_t*,cpchar); - bool_t (*export)(dbt_t*,cpchar); - uint (*getmsgcount)(dbt_t*); - uint (*getcount)(dbt_t*,str_t*); +struct _dbt { + bool_t(*close) (dbt_t *); + bool_t(*mergeclose) (dbt_t *, vec_t *); + bool_t(*unmergeclose) (dbt_t *, vec_t *); + bool_t(*import) (dbt_t *, cpchar); + bool_t(*export) (dbt_t *, cpchar); + uint(*getmsgcount) (dbt_t *); + uint(*getcount) (dbt_t *, str_t *); }; /* database instance */ typedef struct _dbh dbh_t; -struct _dbh -{ - bool_t (*close)(dbh_t*); - dbt_t* (*opentable)(dbh_t*,cpchar,bool_t); +struct _dbh { + bool_t(*close) (dbh_t *); + dbt_t *(*opentable) (dbh_t *, cpchar, bool_t); }; -dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass ); +dbh_t *dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass); #define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n" #define TEXTDB_MAXLINELEN (MAXWORDLEN+32) -uint db_getnewcount( veciter_t* piter ); +uint db_getnewcount(veciter_t * piter); -#endif /* ndef _DBH_H */ +#endif /* ndef _DBH_H */ diff --git a/dbtext.c b/dbtext.c @@ -21,78 +21,83 @@ static void dbtext_table_setsize(dbttext_t * pthis, uint nsize) { - if (nsize > pthis->nalloc) { - uint nnewalloc; - rec_t *pnewitems; - uint n; - - nnewalloc = pthis->nalloc * 2; - if (nnewalloc < nsize) - nnewalloc = nsize; - pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t)); - if (pnewitems == NULL) { - exit(2); - } - for (n = pthis->nitems; n < nsize; n++) { - str_create(&pnewitems[n].w); - pnewitems[n].n = 0; - } - pthis->pitems = pnewitems; - pthis->nalloc = nnewalloc; + uint nnewalloc; + rec_t *pnewitems; + uint n; + + if (nsize <= pthis->nalloc) + return; + + nnewalloc = pthis->nalloc * 2; + if (nnewalloc < nsize) + nnewalloc = nsize; + pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t)); + if (pnewitems == NULL) { + exit(2); } + for (n = pthis->nitems; n < nsize; n++) { + str_create(&pnewitems[n].w); + pnewitems[n].n = 0; + } + pthis->pitems = pnewitems; + pthis->nalloc = nnewalloc; } dbh_t * dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass) { - dbhtext_t *pthis; - + dbhtext_t *pthis = NULL; uint dirlen; cpchar phome; struct stat st; - pthis = (dbhtext_t *) malloc(sizeof(dbhtext_t)); - if (pthis == NULL) { + if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) { + perror("malloc()"); goto bail; } + pthis->close = dbtext_db_close; pthis->opentable = dbtext_db_opentable; + if (dbname != NULL && *dbname != '\0') { dirlen = strlen(dbname); - pthis->dir = strdup(dbname); - if (pthis->dir[dirlen - 1] == '/') { - pthis->dir[dirlen - 1] = '\0'; + if ((pthis->dir = strdup(dbname)) == NULL) { + perror("strdup()"); + goto bail; } + if (dirlen && pthis->dir[dirlen - 1] == '/') + pthis->dir[--dirlen] = '\0'; } else { phome = getenv("HOME"); if (phome == NULL || *phome == '\0') { phome = "."; } - pthis->dir = (char *) malloc(strlen(phome) + 5 + 1); - if (pthis->dir == NULL) { + dirlen = strlen(phome) + 5 + 1; + if ((pthis->dir = malloc(dirlen)) == NULL) goto bail; - } - sprintf(pthis->dir, "%s/.bmf", phome); + + snprintf(pthis->dir, dirlen, "%s/.bmf", phome); } - /* ensure config directory exists */ + /* make sure config directory exists */ if (stat(pthis->dir, &st) != 0) { - if (errno == ENOENT) { - if (mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0) { - goto bail; - } - } else { + if (errno != ENOENT || + mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0) goto bail; - } } else { - if (!S_ISDIR(st.st_mode)) { + if (!S_ISDIR(st.st_mode)) goto bail; - } } - return (dbh_t *) pthis; + return (dbh_t *)pthis; bail: + if (pthis) { + if (pthis->dir) + free(pthis->dir); + free(pthis); + } + return NULL; } @@ -114,19 +119,17 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly) #endif /* ndef NOLOCK */ char szpath[PATH_MAX]; - int flags; + int flags, ret; struct stat st; - char *pbegin; char *pend; rec_t r; uint pos; - if (pthis->dir == NULL) { + if (pthis->dir == NULL) goto bail; - } - ptable = (dbttext_t *) malloc(sizeof(dbttext_t)); - if (ptable == NULL) { + + if ((ptable = malloc(sizeof(dbttext_t))) == NULL) { perror("malloc()"); goto bail; } @@ -144,13 +147,18 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly) ptable->nitems = 0; ptable->pitems = NULL; - sprintf(szpath, "%s/%s.txt", pthis->dir, table); - flags = (rdonly ? O_RDONLY | O_CREAT : O_RDWR | O_CREAT); - ptable->fd = open(szpath, flags, 0644); - if (ptable->fd == -1) { + ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table); + if (ret == -1 || (size_t)ret >= sizeof(szpath)) { + fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, table); + goto bail; + } + + flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR); + if ((ptable->fd = open(szpath, flags, 0644)) == -1) { perror("open()"); goto bail; } + #ifndef NOLOCK memset(&lock, 0, sizeof(lock)); lock.l_type = rdonly ? F_RDLCK : F_WRLCK; @@ -307,8 +315,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) } if (cmp < 0) { /* write existing str */ - assert(prec->w.p != NULL && prec->w.len > 0); - assert(prec->w.len <= MAXWORDLEN); count = prec->n; strncpylwr(p, prec->w.p, prec->w.len); p += prec->w.len; @@ -318,10 +324,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) pos++; } else if (cmp == 0) { /* same str, merge and write sum */ - assert(prec->w.p != NULL && prec->w.len > 0); - assert(pmsgstr->p != NULL && pmsgstr->len > 0); - assert(prec->w.len <= MAXWORDLEN); - assert(pmsgstr->len <= MAXWORDLEN); count = db_getnewcount(&msgiter); count += prec->n; strncpylwr(p, prec->w.p, prec->w.len); @@ -334,8 +336,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) pmsgstr = veciter_get(&msgiter); } else { /* cmp > 0 */ /* write new str */ - assert(pmsgstr->p != NULL && pmsgstr->len > 0); - assert(pmsgstr->len <= MAXWORDLEN); count = db_getnewcount(&msgiter); strncpylwr(p, pmsgstr->p, pmsgstr->len); p += pmsgstr->len; @@ -398,8 +398,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) } if (cmp < 0) { /* write existing str */ - assert(prec->w.p != NULL && prec->w.len > 0); - assert(prec->w.len <= MAXWORDLEN); count = prec->n; strncpylwr(p, prec->w.p, prec->w.len); p += prec->w.len; @@ -409,10 +407,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) pos++; } else if (cmp == 0) { /* same str, merge and write difference */ - assert(prec->w.p != NULL && prec->w.len > 0); - assert(pmsgstr->p != NULL && pmsgstr->len > 0); - assert(prec->w.len <= MAXWORDLEN); - assert(pmsgstr->len <= MAXWORDLEN); count = db_getnewcount(&msgiter); count = (prec->n > count) ? (prec->n - count) : 0; strncpylwr(p, prec->w.p, prec->w.len); @@ -425,8 +419,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) pmsgstr = veciter_get(&msgiter); } else { /* cmp > 0 */ /* this should not happen, so write with count=0 */ - assert(pmsgstr->p != NULL && pmsgstr->len > 0); - assert(pmsgstr->len <= MAXWORDLEN); db_getnewcount(&msgiter); count = 0; strncpylwr(p, pmsgstr->p, pmsgstr->len); @@ -485,53 +477,9 @@ dbtext_table_getcount(dbttext_t * pthis, str_t * pword) else lo = mid; } - assert(hi >= 0 && hi < pthis->nitems); if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) { return 0; } return pthis->pitems[hi].n; } - -#ifdef UNIT_TEST -int -main(int argc, char **argv) -{ - dbh_t *pdb; - veciter_t iter; - str_t *pstr; - uint n; - - if (argc != 2) { - fprintf(stderr, "usage: %s <file>\n", argv[0]); - return 1; - } - for (n = 0; n < 100; n++) { - pdb = dbh_open("testlist", true); - - vec_first(&db, &iter); - while ((pstr = veciter_get(&iter)) != NULL) { - char buf[MAXWORDLEN + 32]; - char *p; - - if (pstr->len > 200) { - fprintf(stderr, "str too long: %u chars\n", pstr->len); - break; - } - p = buf; - strcpy(buf, "str: "); - p += 6; - memcpy(p, pstr->p, pstr->len); - p += pstr->len; - sprintf(p, " %u", pstr->count); - puts(buf); - - veciter_next(&iter); - } - - dbh_close(&db); - } - - return 0; -} -#endif /* def UNIT_TEST */ diff --git a/filt.c b/filt.c @@ -21,7 +21,7 @@ #define DEVIATION(n) fabs((n)-0.5f) /* Dump the contents of a statistics structure */ -void +void statdump(stats_t * pstat, int fd) { char iobuf[IOBUFSIZE]; @@ -49,7 +49,7 @@ statdump(stats_t * pstat, int fd) } } -void +void bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats) { veciter_t iter; @@ -95,8 +95,6 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats) double goodprob = goodtotal ? min(1.0, (goodness / goodtotal)) : 0.0; double spamprob = spamtotal ? min(1.0, (spamness / spamtotal)) : 0.0; - assert(goodtotal > 0 || spamtotal > 0); - #ifdef NON_EQUIPROBABLE prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_prob)) + (spamprob * msg_prob)); #else @@ -146,7 +144,7 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats) pstats->spamicity = product / (product + invproduct); } -bool_t +bool_t bvec_loadmsg(vec_t * pthis, lex_t * plex, tok_t * ptok) { str_t w; diff --git a/lex.c b/lex.c @@ -561,8 +561,6 @@ lex_nexttoken(lex_t * pthis, tok_t * ptok) uint len; uint toklen; - assert(pthis->pbuf != NULL); - if (pthis->pos == pthis->eom) { pthis->bom = pthis->pos; } @@ -637,9 +635,6 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits) char szbuf[256]; bool_t in_headers = true; - assert(pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen); - assert(pthis->bom <= pthis->eom); - pthis->pos = pthis->bom; if (is_spam) { sprintf(szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests=bmf\n" @@ -682,43 +677,3 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits) } pthis->bom = pthis->eom; } - -#ifdef UNIT_TEST - -int -main(int argc, char **argv) -{ - int fd; - lex_t lex; - tok_t tok; - - fd = STDIN_FILENO; - if (argc == 2) { - fd = open(argv[1], O_RDONLY); - } - lex_create(&lex); - if (!lex_load(&lex, fd)) { - fprintf(stderr, "cannot load file\n"); - exit(1); - } - lex_nexttoken(&lex, &tok); - while (tok.tt != eof) { - char sztok[64]; - - if (tok.len > MAXWORDLEN) { - printf("*** token too long! ***\n"); - exit(1); - } - memcpy(sztok, tok.p, tok.len); - strlwr(sztok); - sztok[tok.len] = '\0'; - printf("get_token: %d '%s'\n", tok.tt, sztok); - - lex_nexttoken(&lex, &tok); - } - - lex_destroy(&lex); - return 0; -} - -#endif /* def UNIT_TEST */ diff --git a/lex.h b/lex.h @@ -14,23 +14,23 @@ typedef enum { from, eof, word } toktype_t; typedef struct _tok { - toktype_t tt; /* token type */ - char* p; - uint len; + toktype_t tt; /* token type */ + char *p; + uint len; } tok_t; typedef enum { envelope, hdrs, body } msgsec_t; typedef struct _lex { - mbox_t mboxtype; - msgsec_t section; /* current section (envelope, headers, body) */ - uint pos; /* current position */ - uint bom; /* beginning of message */ - uint eom; /* end of current message (start of next) */ - uint lineend; /* line end (actually, start of next line) */ - uint buflen; /* length of buffer */ - char* pbuf; + mbox_t mboxtype; + msgsec_t section; /* current section (envelope, headers, body) */ + uint pos; /* current position */ + uint bom; /* beginning of message */ + uint eom; /* end of current message (start of next) */ + uint lineend; /* line end (actually, start of next line) */ + uint buflen; /* length of buffer */ + char *pbuf; } lex_t; void lex_create ( lex_t* plex, mbox_t mboxtype ); diff --git a/str.c b/str.c @@ -12,23 +12,6 @@ #include "str.h" void -strlwr(char *s) -{ - while (*s != '\0') { - *s = tolower(*s); - s++; - } -} - -void -strcpylwr(char *d, const char *s) -{ - while (*s != '\0') { - *d++ = tolower(*s++); - } -} - -void strncpylwr(char *d, const char *s, int n) { while (n--) { @@ -37,46 +20,20 @@ strncpylwr(char *d, const char *s, int n) } void -str_create(str_t * pstr) +str_create(str_t *pstr) { pstr->p = NULL; pstr->len = 0; } -void -str_destroy(str_t * pstr) -{ - /* empty */ -} - -int -str_cmp(const str_t * pthis, const str_t * pother) -{ - uint minlen = min(pthis->len, pother->len); - int cmp; - - assert(pthis->p != NULL && pother->p != NULL && minlen != 0); - - cmp = strncmp(pthis->p, pother->p, minlen); - - if (cmp == 0 && pthis->len != pother->len) { - cmp = (pthis->len < pother->len) ? -1 : 1; - } - return cmp; -} - int str_casecmp(const str_t * pthis, const str_t * pother) { - uint minlen = min(pthis->len, pother->len); int cmp; - assert(pthis->p != NULL && pother->p != NULL && minlen != 0); - - cmp = strncasecmp(pthis->p, pother->p, minlen); - - if (cmp == 0 && pthis->len != pother->len) { + cmp = strncasecmp(pthis->p, pother->p, min(pthis->len, pother->len)); + if (cmp == 0 && pthis->len != pother->len) cmp = (pthis->len < pother->len) ? -1 : 1; - } + return cmp; } diff --git a/str.h b/str.h @@ -11,8 +11,6 @@ #define _STR_H /* a couple of generic string functions... */ -void strlwr( char* s ); -void strcpylwr( char* d, const char* s ); void strncpylwr( char* d, const char* s, int n ); typedef struct _str @@ -22,9 +20,7 @@ typedef struct _str } str_t; void str_create ( str_t* pthis ); -void str_destroy( str_t* pthis ); -int str_cmp ( const str_t* pthis, const str_t* pother ); int str_casecmp( const str_t* pthis, const str_t* pother ); #endif /* ndef _STR_H */ diff --git a/vec.c b/vec.c @@ -61,55 +61,20 @@ vec_setsize(vec_t * pthis, uint nsize) } void -vec_addhead(vec_t * pthis, str_t * pstr) -{ - assert(pstr->p != NULL && pstr->len > 0); - - vec_setsize(pthis, pthis->nitems + 1); - memmove(&pthis->pitems[1], &pthis->pitems[0], pthis->nitems * sizeof(str_t)); - pthis->pitems[0] = *pstr; - pthis->nitems++; -} - -void vec_addtail(vec_t * pthis, str_t * pstr) { - assert(pstr->p != NULL && pstr->len > 0); - vec_setsize(pthis, pthis->nitems + 1); pthis->pitems[pthis->nitems] = *pstr; pthis->nitems++; } void -vec_delhead(vec_t * pthis) -{ - assert(pthis->nitems > 0); - pthis->nitems--; - memmove(&pthis->pitems[0], &pthis->pitems[1], pthis->nitems * sizeof(str_t)); -} - -void -vec_deltail(vec_t * pthis) -{ - assert(pthis->nitems > 0); - pthis->nitems--; -} - -void vec_first(vec_t * pthis, veciter_t * piter) { piter->plist = pthis; piter->index = 0; } -void -vec_last(vec_t * pthis, veciter_t * piter) -{ - piter->plist = pthis; - piter->index = pthis->nitems; -} - /***************************************************************************** * sorted vector */ @@ -121,66 +86,6 @@ svec_compare(const void *p1, const void *p2) } void -svec_add(vec_t * pthis, str_t * pstr) -{ - int lo, hi, mid; - veciter_t iter; - - if (pthis->nitems == 0) { - vec_addtail(pthis, pstr); - return; - } - if (str_casecmp(pstr, &pthis->pitems[0]) < 0) { - vec_addhead(pthis, pstr); - return; - } - hi = pthis->nitems - 1; - lo = -1; - while (hi - lo > 1) { - mid = (hi + lo) / 2; - if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0) - hi = mid; - else - lo = mid; - } - assert(hi < pthis->nitems); - - iter.plist = pthis; - iter.index = hi; - - if (str_casecmp(pstr, &pthis->pitems[hi]) < 0) { - veciter_addbefore(&iter, pstr); - } else { - veciter_addafter(&iter, pstr); - } -} - -str_t * -svec_find(vec_t * pthis, str_t * pstr) -{ - int lo, hi, mid; - - if (pthis->nitems == 0) { - return NULL; - } - hi = pthis->nitems - 1; - lo = -1; - while (hi - lo > 1) { - mid = (hi + lo) / 2; - if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0) - hi = mid; - else - lo = mid; - } - assert(hi >= 0 && hi < pthis->nitems); - - if (str_casecmp(pstr, &pthis->pitems[hi]) != 0) { - return NULL; - } - return &pthis->pitems[hi]; -} - -void svec_sort(vec_t * pthis) { if (pthis->nitems > 1) { @@ -208,35 +113,6 @@ veciter_get(veciter_t * pthis) } bool_t -veciter_equal(veciter_t * pthis, veciter_t * pthat) -{ - if (pthis->plist != pthat->plist || - pthis->index != pthat->index) { - return false; - } - return true; -} - -bool_t -veciter_hasitem(veciter_t * pthis) -{ - if (pthis->plist == NULL || pthis->index >= pthis->plist->nitems) { - return false; - } - return true; -} - -bool_t -veciter_prev(veciter_t * pthis) -{ - if (pthis->index == 0) { - return false; - } - pthis->index--; - return true; -} - -bool_t veciter_next(veciter_t * pthis) { pthis->index++; @@ -245,94 +121,3 @@ veciter_next(veciter_t * pthis) } return true; } - -void -veciter_addafter(veciter_t * pthis, str_t * pstr) -{ - str_t *pitems; - - vec_setsize(pthis->plist, pthis->plist->nitems + 1); - assert(pthis->index < pthis->plist->nitems); - pitems = pthis->plist->pitems; - - if (pthis->index != pthis->plist->nitems - 1) { - memmove(&pitems[pthis->index + 2], &pitems[pthis->index + 1], - (pthis->plist->nitems - pthis->index - 1) * sizeof(str_t)); - } - pitems[pthis->index + 1] = *pstr; - pthis->plist->nitems++; -} - -void -veciter_addbefore(veciter_t * pthis, str_t * pstr) -{ - str_t *pitems; - - vec_setsize(pthis->plist, pthis->plist->nitems + 1); - assert(pthis->index < pthis->plist->nitems); - pitems = pthis->plist->pitems; - - memmove(&pitems[pthis->index + 1], &pitems[pthis->index], - (pthis->plist->nitems - pthis->index) * sizeof(str_t)); - - pitems[pthis->index] = *pstr; - pthis->plist->nitems++; -} - -void -veciter_del(veciter_t * pthis) -{ - str_t *pitems; - - assert(pthis->plist->nitems > 0); - pthis->plist->nitems--; - if (pthis->index < pthis->plist->nitems) { - pitems = pthis->plist->pitems; - memmove(&pitems[pthis->index], &pitems[pthis->index + 1], - (pthis->plist->nitems - pthis->index) * sizeof(str_t)); - } -} - -#ifdef UNIT_TEST -int -main(int argc, char **argv) -{ - vec_t vl; - veciter_t iter; - str_t *pstr; - uint n; - - if (argc != 2) { - fprintf(stderr, "usage: %s <file>\n", argv[0]); - return 1; - } - for (n = 0; n < 100; n++) { - vec_create(&vl); - vec_load(&vl, argv[1]); - - vec_first(&vl, &iter); - while ((pstr = veciter_get(&iter)) != NULL) { - char buf[256]; - char *p; - - if (pstr->len > 200) { - fprintf(stderr, "str too long: %u chars\n", pstr->len); - break; - } - p = buf; - strcpy(buf, "str: "); - p += 6; - memcpy(p, pstr->p, pstr->len); - p += pstr->len; - sprintf(p, " %u", pstr->count); - puts(buf); - - veciter_next(&iter); - } - - vec_destroy(&vl); - } - - return 0; -} -#endif /* def UNIT_TEST */ diff --git a/vec.h b/vec.h @@ -13,46 +13,30 @@ /* item count for initial alloc */ #define VEC_INITIAL_SIZE 256 -typedef struct _vec -{ - uint nalloc; /* items alloced in pitems */ - uint nitems; /* items available */ - str_t* pitems; /* growing vector of items */ +typedef struct _vec { + uint nalloc; /* items allocated in pitems */ + uint nitems; /* items available */ + str_t *pitems; /* growing vector of items */ } vec_t; -typedef struct _veciter -{ - struct _vec* plist; - uint index; +typedef struct _veciter { + struct _vec *plist; + uint index; } veciter_t; /* class vector */ -void vec_create ( vec_t* pthis ); -void vec_destroy ( vec_t* pthis ); +void vec_create(vec_t * pthis); +void vec_destroy(vec_t * pthis); +void vec_addtail(vec_t * pthis, str_t * pstr); +void vec_first(vec_t * pthis, veciter_t * piter); -void vec_addhead ( vec_t* pthis, str_t* pstr ); -void vec_addtail ( vec_t* pthis, str_t* pstr ); -void vec_delhead ( vec_t* pthis ); -void vec_deltail ( vec_t* pthis ); +/* class sorted_vector */ +void svec_sort(vec_t * ptthis); -void vec_first ( vec_t* pthis, veciter_t* piter ); -void vec_last ( vec_t* pthis, veciter_t* piter ); +/* veciter_create not needed */ +void veciter_destroy(veciter_t * pthis); -/* class sorted_vector */ -void svec_add ( vec_t* pthis, str_t* pstr ); -str_t* svec_find ( vec_t* pthis, str_t* pstr ); -void svec_sort ( vec_t* ptthis ); - -/* veciter_create not needed */ -void veciter_destroy ( veciter_t* pthis ); - -str_t* veciter_get ( veciter_t* pthis ); -bool_t veciter_equal ( veciter_t* pthis, veciter_t* pthat ); -bool_t veciter_hasitem ( veciter_t* pthis ); -bool_t veciter_prev ( veciter_t* pthis ); -bool_t veciter_next ( veciter_t* pthis ); -void veciter_addafter ( veciter_t* pthis, str_t* pstr ); -void veciter_addbefore( veciter_t* pthis, str_t* pstr ); -void veciter_del ( veciter_t* pthis ); +str_t *veciter_get(veciter_t * pthis); +bool_t veciter_next(veciter_t * pthis); #endif /* ndef _VEC_H */