bmf

bmf (Bayesian Mail Filter) 0.9.4 fork + patches
git clone git://git.codemadness.org/bmf
Log | Files | Refs | README | LICENSE

commit 0f11e5e148314939e59850ef2aaa607f2b06bc90
parent cd31f403d6c7b3acf4a41365c063c4cefef34e83
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sun, 23 Sep 2018 14:36:58 +0200

improve code-style

Diffstat:
Mdbdb.c | 74++++++++++++++++++++++++++++++++++++--------------------------------------
Mdbg.c | 37++++++++++++++++++++-----------------
Mdbtext.c | 982+++++++++++++++++++++++++++++++++++++------------------------------------------
Mfilt.c | 250++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mlex.c | 1296++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mstr.c | 86+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mvec.c | 469+++++++++++++++++++++++++++++++++++++++----------------------------------------
7 files changed, 1531 insertions(+), 1663 deletions(-)

diff --git a/dbdb.c b/dbdb.c @@ -18,53 +18,51 @@ #include "dbh.h" #include "dbdb.h" -dbh_t* dbdb_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass ) +dbh_t * +dbdb_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass) { - return NULL; + return NULL; } #ifdef UNIT_TEST -int main( int argc, char** argv ) +int +main(int argc, char **argv) { - dbh_t* pdb; - veciter_t iter; - str_t* pstr; - uint n; + dbh_t *pdb; + veciter_t iter; + str_t *pstr; + uint n; - if( argc != 2 ) - { - fprintf( stderr, "usage: %s <file>\n", argv[0] ); - return 1; - } + if (argc != 2) { + fprintf(stderr, "usage: %s <file>\n", argv[0]); + return 1; + } + for (n = 0; n < 100; n++) { + pdb = dbh_open("testlist", true); - for( n = 0; n < 100; n++ ) - { - pdb = dbh_open( "testlist", true ); + vec_first(&db, &iter); + while ((pstr = veciter_get(&iter)) != NULL) { + char buf[MAXWORDLEN + 32]; + char *p; - vec_first( &db, &iter ); - while( (pstr = veciter_get( &iter )) != NULL ) - { - char buf[MAXWORDLEN+32]; - char* p; - if( pstr->len > 200 ) - { - fprintf( stderr, "str too long: %u chars\n", pstr->len ); - break; - } - p = buf; - strcpy( buf, "str: " ); - p += 6; - memcpy( p, pstr->p, pstr->len ); - p += pstr->len; - sprintf( p, " %u", pstr->count ); - puts( buf ); + if (pstr->len > 200) { + fprintf(stderr, "str too long: %u chars\n", pstr->len); + break; + } + p = buf; + strcpy(buf, "str: "); + p += 6; + memcpy(p, pstr->p, pstr->len); + p += pstr->len; + sprintf(p, " %u", pstr->count); + puts(buf); - veciter_next( &iter ); - } + veciter_next(&iter); + } - dbh_close( &db ); - } + dbh_close(&db); + } - return 0; + return 0; } -#endif /* def UNIT_TEST */ +#endif /* def UNIT_TEST */ diff --git a/dbg.c b/dbg.c @@ -15,28 +15,31 @@ uint g_verbose = 0; -void verbose( int level, const char* fmt, ... ) +void +verbose(int level, const char *fmt,...) { - if( g_verbose >= level ) - { - char str[4096]; - va_list v; - va_start( v, fmt ); - vsnprintf( str, sizeof(str)-1, fmt, v ); - str[sizeof(str)-1] = '\0'; - - fputs( str, stderr ); - - va_end( v ); - } + if (g_verbose >= level) { + char str[4096]; + va_list v; + + va_start(v, fmt); + vsnprintf(str, sizeof(str) - 1, fmt, v); + str[sizeof(str) - 1] = '\0'; + + fputs(str, stderr); + + va_end(v); + } } -void dbgout( const char* fmt, ... ) +void +dbgout(const char *fmt,...) { - /* empty */ + /* empty */ } -void dump_alloc_heap( void ) +void +dump_alloc_heap(void) { - /* empty */ + /* empty */ } diff --git a/dbtext.c b/dbtext.c @@ -18,574 +18,520 @@ #include "dbh.h" #include "dbtext.h" -static void dbtext_table_setsize( dbttext_t* pthis, uint nsize ) +static void +dbtext_table_setsize(dbttext_t * pthis, uint nsize) { - if( nsize > pthis->nalloc ) - { - uint nnewalloc; - rec_t* pnewitems; - uint n; - - nnewalloc = pthis->nalloc * 2; - if( nnewalloc < nsize ) nnewalloc = nsize; - pnewitems = (rec_t*)realloc( pthis->pitems, nnewalloc*sizeof(rec_t) ); - if( pnewitems == NULL ) - { - exit( 2 ); - } - for( n = pthis->nitems; n < nsize; n++ ) - { - str_create( &pnewitems[n].w ); - pnewitems[n].n = 0; - } - pthis->pitems = pnewitems; - pthis->nalloc = nnewalloc; - } + if (nsize > pthis->nalloc) { + uint nnewalloc; + rec_t *pnewitems; + uint n; + + nnewalloc = pthis->nalloc * 2; + if (nnewalloc < nsize) + nnewalloc = nsize; + pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t)); + if (pnewitems == NULL) { + exit(2); + } + for (n = pthis->nitems; n < nsize; n++) { + str_create(&pnewitems[n].w); + pnewitems[n].n = 0; + } + pthis->pitems = pnewitems; + pthis->nalloc = nnewalloc; + } } -dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass ) +dbh_t * +dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass) { - dbhtext_t* pthis; - - uint dirlen; - cpchar phome; - struct stat st; - - pthis = (dbhtext_t*)malloc( sizeof(dbhtext_t) ); - if( pthis == NULL ) - { - goto bail; - } - pthis->close = dbtext_db_close; - pthis->opentable = dbtext_db_opentable; - if( dbname != NULL && *dbname != '\0' ) - { - dirlen = strlen( dbname ); - pthis->dir = strdup( dbname ); - if( pthis->dir[dirlen-1] == '/' ) - { - pthis->dir[dirlen-1] = '\0'; - } - } - else - { - phome = getenv( "HOME" ); - if( phome == NULL || *phome == '\0' ) - { - phome = "."; - } - pthis->dir = (char*)malloc( strlen(phome)+5+1 ); - if( pthis->dir == NULL ) - { - goto bail; - } - sprintf( pthis->dir, "%s/.bmf", phome ); - } - - /* ensure config directory exists */ - if( stat( pthis->dir, &st ) != 0 ) - { - if( errno == ENOENT ) - { - if( mkdir( pthis->dir, S_IRUSR|S_IWUSR|S_IXUSR ) != 0 ) - { - goto bail; - } - } - else - { - goto bail; - } - } - else - { - if( !S_ISDIR( st.st_mode ) ) - { - goto bail; - } - } - - return (dbh_t*)pthis; + dbhtext_t *pthis; + + uint dirlen; + cpchar phome; + struct stat st; + + pthis = (dbhtext_t *) malloc(sizeof(dbhtext_t)); + if (pthis == NULL) { + goto bail; + } + pthis->close = dbtext_db_close; + pthis->opentable = dbtext_db_opentable; + if (dbname != NULL && *dbname != '\0') { + dirlen = strlen(dbname); + pthis->dir = strdup(dbname); + if (pthis->dir[dirlen - 1] == '/') { + pthis->dir[dirlen - 1] = '\0'; + } + } else { + phome = getenv("HOME"); + if (phome == NULL || *phome == '\0') { + phome = "."; + } + pthis->dir = (char *) malloc(strlen(phome) + 5 + 1); + if (pthis->dir == NULL) { + goto bail; + } + sprintf(pthis->dir, "%s/.bmf", phome); + } + + /* ensure config directory exists */ + if (stat(pthis->dir, &st) != 0) { + if (errno == ENOENT) { + if (mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0) { + goto bail; + } + } else { + goto bail; + } + } else { + if (!S_ISDIR(st.st_mode)) { + goto bail; + } + } + + return (dbh_t *) pthis; bail: - return NULL; + return NULL; } -bool_t dbtext_db_close( dbhtext_t* pthis ) +bool_t +dbtext_db_close(dbhtext_t * pthis) { - free( pthis->dir ); - pthis->dir = NULL; - return true; + free(pthis->dir); + pthis->dir = NULL; + return true; } -dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly ) +dbt_t * +dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly) { - dbttext_t* ptable = NULL; + dbttext_t *ptable = NULL; #ifndef NOLOCK - struct flock lock; -#endif /* ndef NOLOCK */ - char szpath[PATH_MAX]; - int flags; - struct stat st; - - char* pbegin; - char* pend; - rec_t r; - uint pos; - - if( pthis->dir == NULL ) - { - goto bail; - } - - ptable = (dbttext_t*)malloc( sizeof(dbttext_t) ); - if( ptable == NULL ) - { - perror( "malloc()" ); - goto bail; - } - ptable->close = dbtext_table_close; - ptable->mergeclose = dbtext_table_mergeclose; - ptable->unmergeclose = dbtext_table_unmergeclose; - ptable->import = dbtext_table_import; - ptable->export = dbtext_table_export; - ptable->getmsgcount = dbtext_table_getmsgcount; - ptable->getcount = dbtext_table_getcount; - ptable->fd = -1; - ptable->pbuf = NULL; - ptable->nmsgs = 0; - ptable->nalloc = 0; - ptable->nitems = 0; - ptable->pitems = NULL; - - sprintf( szpath, "%s/%s.txt", pthis->dir, table ); - flags = (rdonly ? O_RDONLY|O_CREAT : O_RDWR|O_CREAT); - ptable->fd = open( szpath, flags, 0644 ); - if( ptable->fd == -1 ) - { - perror( "open()" ); - goto bail; - } - + struct flock lock; + +#endif /* ndef NOLOCK */ + char szpath[PATH_MAX]; + int flags; + struct stat st; + + char *pbegin; + char *pend; + rec_t r; + uint pos; + + if (pthis->dir == NULL) { + goto bail; + } + ptable = (dbttext_t *) malloc(sizeof(dbttext_t)); + if (ptable == NULL) { + perror("malloc()"); + goto bail; + } + ptable->close = dbtext_table_close; + ptable->mergeclose = dbtext_table_mergeclose; + ptable->unmergeclose = dbtext_table_unmergeclose; + ptable->import = dbtext_table_import; + ptable->export = dbtext_table_export; + ptable->getmsgcount = dbtext_table_getmsgcount; + ptable->getcount = dbtext_table_getcount; + ptable->fd = -1; + ptable->pbuf = NULL; + ptable->nmsgs = 0; + ptable->nalloc = 0; + ptable->nitems = 0; + ptable->pitems = NULL; + + sprintf(szpath, "%s/%s.txt", pthis->dir, table); + flags = (rdonly ? O_RDONLY | O_CREAT : O_RDWR | O_CREAT); + ptable->fd = open(szpath, flags, 0644); + if (ptable->fd == -1) { + perror("open()"); + goto bail; + } #ifndef NOLOCK - memset( &lock, 0, sizeof(lock) ); - lock.l_type = rdonly ? F_RDLCK : F_WRLCK; - lock.l_start = 0; - lock.l_whence = SEEK_SET; - lock.l_len = 0; - fcntl( ptable->fd, F_SETLKW, &lock ); -#endif /* ndef NOLOCK */ - - if( fstat( ptable->fd, &st ) != 0 ) - { - perror( "fstat()" ); - goto bail_uc; - } - - if( st.st_size == 0 ) - { - return (dbt_t*)ptable; - } - - ptable->pbuf = (char*)malloc( st.st_size ); - if( ptable->pbuf == NULL ) - { - perror( "malloc()" ); - goto bail_uc; - } - - if( read( ptable->fd, ptable->pbuf, st.st_size ) != st.st_size ) - { - perror( "read()" ); - goto bail_fuc; - } - - /* XXX: bogofilter compatibility */ - if( sscanf( ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs ) != 1 ) - { - goto bail_fuc; - } - pbegin = ptable->pbuf; - while( *pbegin != '\n' ) pbegin++; - pbegin++; - - pos = 0; - while( pbegin < ptable->pbuf + st.st_size ) - { - pend = pbegin; - r.w.p = pbegin; - r.w.len = 0; - r.n = 0; - - while( *pend != '\n' ) - { - if( pend >= ptable->pbuf + st.st_size ) - { - goto bail_fuc; - } - *pend = tolower(*pend); - if( *pend == ' ' ) - { - r.w.len = (pend-pbegin); - r.n = strtol( pend+1, NULL, 10 ); - } - pend++; - } - if( pend > pbegin && *pbegin != '#' && *pbegin != ';' ) - { - if( r.w.len == 0 || r.w.len > MAXWORDLEN ) - { - fprintf( stderr, "dbh_loadfile: bad file format\n" ); - goto bail_fuc; - } - dbtext_table_setsize( ptable, pos+1 ); - ptable->pitems[pos++] = r; - ptable->nitems = pos; - } - pbegin = pend+1; - } - - if( rdonly ) - { + memset(&lock, 0, sizeof(lock)); + lock.l_type = rdonly ? F_RDLCK : F_WRLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; + fcntl(ptable->fd, F_SETLKW, &lock); +#endif /* ndef NOLOCK */ + + if (fstat(ptable->fd, &st) != 0) { + perror("fstat()"); + goto bail_uc; + } + if (st.st_size == 0) { + return (dbt_t *) ptable; + } + ptable->pbuf = (char *) malloc(st.st_size); + if (ptable->pbuf == NULL) { + perror("malloc()"); + goto bail_uc; + } + if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) { + perror("read()"); + goto bail_fuc; + } + /* XXX: bogofilter compatibility */ + if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) { + goto bail_fuc; + } + pbegin = ptable->pbuf; + while (*pbegin != '\n') + pbegin++; + pbegin++; + + pos = 0; + while (pbegin < ptable->pbuf + st.st_size) { + pend = pbegin; + r.w.p = pbegin; + r.w.len = 0; + r.n = 0; + + while (*pend != '\n') { + if (pend >= ptable->pbuf + st.st_size) { + goto bail_fuc; + } + *pend = tolower(*pend); + if (*pend == ' ') { + r.w.len = (pend - pbegin); + r.n = strtol(pend + 1, NULL, 10); + } + pend++; + } + if (pend > pbegin && *pbegin != '#' && *pbegin != ';') { + if (r.w.len == 0 || r.w.len > MAXWORDLEN) { + fprintf(stderr, "dbh_loadfile: bad file format\n"); + goto bail_fuc; + } + dbtext_table_setsize(ptable, pos + 1); + ptable->pitems[pos++] = r; + ptable->nitems = pos; + } + pbegin = pend + 1; + } + + if (rdonly) { #ifndef NOLOCK - lock.l_type = F_UNLCK; - fcntl( ptable->fd, F_SETLKW, &lock ); -#endif /* ndef NOLOCK */ - close( ptable->fd ); - ptable->fd = -1; - } - - return (dbt_t*)ptable; + lock.l_type = F_UNLCK; + fcntl(ptable->fd, F_SETLKW, &lock); +#endif /* ndef NOLOCK */ + close(ptable->fd); + ptable->fd = -1; + } + return (dbt_t *) ptable; bail_fuc: - free( ptable->pbuf ); + free(ptable->pbuf); bail_uc: #ifndef NOLOCK - lock.l_type = F_UNLCK; - fcntl( ptable->fd, F_SETLKW, &lock ); -#endif /* ndef NOLOCK */ + lock.l_type = F_UNLCK; + fcntl(ptable->fd, F_SETLKW, &lock); +#endif /* ndef NOLOCK */ - close( ptable->fd ); - ptable->fd = -1; + close(ptable->fd); + ptable->fd = -1; bail: - free( ptable ); - return NULL; + free(ptable); + return NULL; } -bool_t dbtext_table_close( dbttext_t* pthis ) +bool_t +dbtext_table_close(dbttext_t * pthis) { - struct flock lockall; + struct flock lockall; - free( pthis->pbuf ); - pthis->pbuf = NULL; - free( pthis->pitems ); - pthis->pitems = NULL; + free(pthis->pbuf); + pthis->pbuf = NULL; + free(pthis->pitems); + pthis->pitems = NULL; - if( pthis->fd != -1 ) - { + if (pthis->fd != -1) { #ifndef NOLOCK - memset( &lockall, 0, sizeof(lockall) ); - lockall.l_type = F_UNLCK; - lockall.l_start = 0; - lockall.l_whence = SEEK_SET; - lockall.l_len = 0; - fcntl( pthis->fd, F_SETLKW, &lockall ); -#endif /* ndef NOLOCK */ - close( pthis->fd ); - pthis->fd = -1; - } - - return true; + memset(&lockall, 0, sizeof(lockall)); + lockall.l_type = F_UNLCK; + lockall.l_start = 0; + lockall.l_whence = SEEK_SET; + lockall.l_len = 0; + fcntl(pthis->fd, F_SETLKW, &lockall); +#endif /* ndef NOLOCK */ + close(pthis->fd); + pthis->fd = -1; + } + return true; } -bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg ) +bool_t +dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) { - /* note that we require both vectors to be sorted */ - - uint pos; - rec_t* prec; - veciter_t msgiter; - str_t* pmsgstr; - uint count; - char iobuf[IOBUFSIZE]; - char* p; - - if( pthis->fd == -1 ) - { - return false; - } - ftruncate( pthis->fd, 0 ); - lseek( pthis->fd, 0, SEEK_SET ); - - pthis->nmsgs++; - - p = iobuf; - p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs ); - - vec_first( pmsg, &msgiter ); - pmsgstr = veciter_get( &msgiter ); - - pos = 0; - while( pos < pthis->nitems || pmsgstr != NULL ) - { - int cmp = 0; - prec = &pthis->pitems[pos]; - if( pmsgstr != NULL && pos < pthis->nitems ) - { - cmp = str_casecmp( &prec->w, pmsgstr ); - } - else - { - /* we exhausted one list or the other (but not both) */ - cmp = (pos < pthis->nitems) ? -1 : 1; - } - if( cmp < 0 ) - { - /* write existing str */ - assert( prec->w.p != NULL && prec->w.len > 0 ); - assert( prec->w.len <= MAXWORDLEN ); - count = prec->n; - strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; - *p++ = ' '; - p += sprintf( p, "%u\n", count ); - - pos++; - } - else if( cmp == 0 ) - { - /* same str, merge and write sum */ - assert( prec->w.p != NULL && prec->w.len > 0 ); - assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); - assert( prec->w.len <= MAXWORDLEN ); - assert( pmsgstr->len <= MAXWORDLEN ); - count = db_getnewcount( &msgiter ); - count += prec->n; - strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; - *p++ = ' '; - p += sprintf( p, "%u\n", count ); - - pos++; - veciter_next( &msgiter ); - pmsgstr = veciter_get( &msgiter ); - } - else /* cmp > 0 */ - { - /* write new str */ - assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); - assert( pmsgstr->len <= MAXWORDLEN ); - count = db_getnewcount( &msgiter ); - strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len; - *p++ = ' '; - p += sprintf( p, "%u\n", count ); - - veciter_next( &msgiter ); - pmsgstr = veciter_get( &msgiter ); - } - - if( p+TEXTDB_MAXLINELEN > (iobuf+1) ) - { - write( pthis->fd, iobuf, p-iobuf ); - p = iobuf; - } - } - if( p != iobuf ) - { - write( pthis->fd, iobuf, p-iobuf ); - } - - veciter_destroy( &msgiter ); - return dbtext_table_close( pthis ); + /* note that we require both vectors to be sorted */ + + uint pos; + rec_t *prec; + veciter_t msgiter; + str_t *pmsgstr; + uint count; + char iobuf[IOBUFSIZE]; + char *p; + + if (pthis->fd == -1) { + return false; + } + ftruncate(pthis->fd, 0); + lseek(pthis->fd, 0, SEEK_SET); + + pthis->nmsgs++; + + p = iobuf; + p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs); + + vec_first(pmsg, &msgiter); + pmsgstr = veciter_get(&msgiter); + + pos = 0; + while (pos < pthis->nitems || pmsgstr != NULL) { + int cmp = 0; + + prec = &pthis->pitems[pos]; + if (pmsgstr != NULL && pos < pthis->nitems) { + cmp = str_casecmp(&prec->w, pmsgstr); + } else { + /* we exhausted one list or the other (but not both) */ + cmp = (pos < pthis->nitems) ? -1 : 1; + } + if (cmp < 0) { + /* write existing str */ + assert(prec->w.p != NULL && prec->w.len > 0); + assert(prec->w.len <= MAXWORDLEN); + count = prec->n; + strncpylwr(p, prec->w.p, prec->w.len); + p += prec->w.len; + *p++ = ' '; + p += sprintf(p, "%u\n", count); + + pos++; + } else if (cmp == 0) { + /* same str, merge and write sum */ + assert(prec->w.p != NULL && prec->w.len > 0); + assert(pmsgstr->p != NULL && pmsgstr->len > 0); + assert(prec->w.len <= MAXWORDLEN); + assert(pmsgstr->len <= MAXWORDLEN); + count = db_getnewcount(&msgiter); + count += prec->n; + strncpylwr(p, prec->w.p, prec->w.len); + p += prec->w.len; + *p++ = ' '; + p += sprintf(p, "%u\n", count); + + pos++; + veciter_next(&msgiter); + pmsgstr = veciter_get(&msgiter); + } else { /* cmp > 0 */ + /* write new str */ + assert(pmsgstr->p != NULL && pmsgstr->len > 0); + assert(pmsgstr->len <= MAXWORDLEN); + count = db_getnewcount(&msgiter); + strncpylwr(p, pmsgstr->p, pmsgstr->len); + p += pmsgstr->len; + *p++ = ' '; + p += sprintf(p, "%u\n", count); + + veciter_next(&msgiter); + pmsgstr = veciter_get(&msgiter); + } + + if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) { + write(pthis->fd, iobuf, p - iobuf); + p = iobuf; + } + } + if (p != iobuf) { + write(pthis->fd, iobuf, p - iobuf); + } + veciter_destroy(&msgiter); + return dbtext_table_close(pthis); } -bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg ) +bool_t +dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) { - /* note that we require both vectors to be sorted */ - - uint pos; - rec_t* prec; - veciter_t msgiter; - str_t* pmsgstr; - uint count; - char iobuf[IOBUFSIZE]; - char* p; - - if( pthis->fd == -1 ) - { - return false; - } - ftruncate( pthis->fd, 0 ); - lseek( pthis->fd, 0, SEEK_SET ); - - pthis->nmsgs--; - - p = iobuf; - p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs ); - - vec_first( pmsg, &msgiter ); - pmsgstr = veciter_get( &msgiter ); - - pos = 0; - while( pos < pthis->nitems || pmsgstr != NULL ) - { - int cmp = 0; - prec = &pthis->pitems[pos]; - if( pmsgstr != NULL && pos < pthis->nitems ) - { - cmp = str_casecmp( &prec->w, pmsgstr ); - } - else - { - /* we exhausted one list or the other (but not both) */ - cmp = (pos < pthis->nitems) ? -1 : 1; - } - if( cmp < 0 ) - { - /* write existing str */ - assert( prec->w.p != NULL && prec->w.len > 0 ); - assert( prec->w.len <= MAXWORDLEN ); - count = prec->n; - strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; - *p++ = ' '; - p += sprintf( p, "%u\n", count ); - - pos++; - } - else if( cmp == 0 ) - { - /* same str, merge and write difference */ - assert( prec->w.p != NULL && prec->w.len > 0 ); - assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); - assert( prec->w.len <= MAXWORDLEN ); - assert( pmsgstr->len <= MAXWORDLEN ); - count = db_getnewcount( &msgiter ); - count = (prec->n > count) ? (prec->n - count) : 0; - strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; - *p++ = ' '; - p += sprintf( p, "%u\n", count ); - - pos++; - veciter_next( &msgiter ); - pmsgstr = veciter_get( &msgiter ); - } - else /* cmp > 0 */ - { - /* this should not happen, so write with count=0 */ - assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); - assert( pmsgstr->len <= MAXWORDLEN ); - db_getnewcount( &msgiter ); - count = 0; - strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len; - *p++ = ' '; - p += sprintf( p, "%u\n", count ); - - veciter_next( &msgiter ); - pmsgstr = veciter_get( &msgiter ); - } - - if( p+TEXTDB_MAXLINELEN > (iobuf+1) ) - { - write( pthis->fd, iobuf, p-iobuf ); - p = iobuf; - } - } - if( p != iobuf ) - { - write( pthis->fd, iobuf, p-iobuf ); - } - - veciter_destroy( &msgiter ); - return dbtext_table_close( pthis ); + /* note that we require both vectors to be sorted */ + + uint pos; + rec_t *prec; + veciter_t msgiter; + str_t *pmsgstr; + uint count; + char iobuf[IOBUFSIZE]; + char *p; + + if (pthis->fd == -1) { + return false; + } + ftruncate(pthis->fd, 0); + lseek(pthis->fd, 0, SEEK_SET); + + pthis->nmsgs--; + + p = iobuf; + p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs); + + vec_first(pmsg, &msgiter); + pmsgstr = veciter_get(&msgiter); + + pos = 0; + while (pos < pthis->nitems || pmsgstr != NULL) { + int cmp = 0; + + prec = &pthis->pitems[pos]; + if (pmsgstr != NULL && pos < pthis->nitems) { + cmp = str_casecmp(&prec->w, pmsgstr); + } else { + /* we exhausted one list or the other (but not both) */ + cmp = (pos < pthis->nitems) ? -1 : 1; + } + if (cmp < 0) { + /* write existing str */ + assert(prec->w.p != NULL && prec->w.len > 0); + assert(prec->w.len <= MAXWORDLEN); + count = prec->n; + strncpylwr(p, prec->w.p, prec->w.len); + p += prec->w.len; + *p++ = ' '; + p += sprintf(p, "%u\n", count); + + pos++; + } else if (cmp == 0) { + /* same str, merge and write difference */ + assert(prec->w.p != NULL && prec->w.len > 0); + assert(pmsgstr->p != NULL && pmsgstr->len > 0); + assert(prec->w.len <= MAXWORDLEN); + assert(pmsgstr->len <= MAXWORDLEN); + count = db_getnewcount(&msgiter); + count = (prec->n > count) ? (prec->n - count) : 0; + strncpylwr(p, prec->w.p, prec->w.len); + p += prec->w.len; + *p++ = ' '; + p += sprintf(p, "%u\n", count); + + pos++; + veciter_next(&msgiter); + pmsgstr = veciter_get(&msgiter); + } else { /* cmp > 0 */ + /* this should not happen, so write with count=0 */ + assert(pmsgstr->p != NULL && pmsgstr->len > 0); + assert(pmsgstr->len <= MAXWORDLEN); + db_getnewcount(&msgiter); + count = 0; + strncpylwr(p, pmsgstr->p, pmsgstr->len); + p += pmsgstr->len; + *p++ = ' '; + p += sprintf(p, "%u\n", count); + + veciter_next(&msgiter); + pmsgstr = veciter_get(&msgiter); + } + + if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) { + write(pthis->fd, iobuf, p - iobuf); + p = iobuf; + } + } + if (p != iobuf) { + write(pthis->fd, iobuf, p - iobuf); + } + veciter_destroy(&msgiter); + return dbtext_table_close(pthis); } -bool_t dbtext_table_import( dbttext_t* pthis, cpchar filename ) +bool_t +dbtext_table_import(dbttext_t * pthis, cpchar filename) { - return false; + return false; } -bool_t dbtext_table_export( dbttext_t* pthis, cpchar filename ) +bool_t +dbtext_table_export(dbttext_t * pthis, cpchar filename) { - return false; + return false; } -uint dbtext_table_getmsgcount( dbttext_t* pthis ) +uint +dbtext_table_getmsgcount(dbttext_t * pthis) { - return pthis->nmsgs; + return pthis->nmsgs; } -uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword ) +uint +dbtext_table_getcount(dbttext_t * pthis, str_t * pword) { - int lo, hi, mid; - - if( pthis->nitems == 0 ) - { - return 0; - } - - hi = pthis->nitems - 1; - lo = -1; - while( hi-lo > 1 ) - { - mid = (hi+lo)/2; - if( str_casecmp( pword, &pthis->pitems[mid].w ) <= 0 ) - hi = mid; - else - lo = mid; - } - assert( hi >= 0 && hi < pthis->nitems ); - - if( str_casecmp( pword, &pthis->pitems[hi].w ) != 0 ) - { - return 0; - } - - return pthis->pitems[hi].n; + int lo, hi, mid; + + if (pthis->nitems == 0) { + return 0; + } + hi = pthis->nitems - 1; + lo = -1; + while (hi - lo > 1) { + mid = (hi + lo) / 2; + if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0) + hi = mid; + else + lo = mid; + } + assert(hi >= 0 && hi < pthis->nitems); + + if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) { + return 0; + } + return pthis->pitems[hi].n; } #ifdef UNIT_TEST -int main( int argc, char** argv ) +int +main(int argc, char **argv) { - dbh_t* pdb; - veciter_t iter; - str_t* pstr; - uint n; - - if( argc != 2 ) - { - fprintf( stderr, "usage: %s <file>\n", argv[0] ); - return 1; - } - - for( n = 0; n < 100; n++ ) - { - pdb = dbh_open( "testlist", true ); - - vec_first( &db, &iter ); - while( (pstr = veciter_get( &iter )) != NULL ) - { - char buf[MAXWORDLEN+32]; - char* p; - if( pstr->len > 200 ) - { - fprintf( stderr, "str too long: %u chars\n", pstr->len ); - break; - } - p = buf; - strcpy( buf, "str: " ); - p += 6; - memcpy( p, pstr->p, pstr->len ); - p += pstr->len; - sprintf( p, " %u", pstr->count ); - puts( buf ); - - veciter_next( &iter ); - } - - dbh_close( &db ); - } - - return 0; + dbh_t *pdb; + veciter_t iter; + str_t *pstr; + uint n; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file>\n", argv[0]); + return 1; + } + for (n = 0; n < 100; n++) { + pdb = dbh_open("testlist", true); + + vec_first(&db, &iter); + while ((pstr = veciter_get(&iter)) != NULL) { + char buf[MAXWORDLEN + 32]; + char *p; + + if (pstr->len > 200) { + fprintf(stderr, "str too long: %u chars\n", pstr->len); + break; + } + p = buf; + strcpy(buf, "str: "); + p += 6; + memcpy(p, pstr->p, pstr->len); + p += pstr->len; + sprintf(p, " %u", pstr->count); + puts(buf); + + veciter_next(&iter); + } + + dbh_close(&db); + } + + return 0; } -#endif /* def UNIT_TEST */ +#endif /* def UNIT_TEST */ diff --git a/filt.c b/filt.c @@ -21,155 +21,143 @@ #define DEVIATION(n) fabs((n)-0.5f) /* Dump the contents of a statistics structure */ -void statdump( stats_t* pstat, int fd ) +void +statdump(stats_t * pstat, int fd) { - char iobuf[IOBUFSIZE]; - char* p; - discrim_t* pp; - - p = iobuf; - p += sprintf( iobuf, "# Spamicity: %f\n", pstat->spamicity ); - - for (pp = pstat->extrema; pp < pstat->extrema + pstat->keepers; pp++) - { - if (pp->key.len) - { - strcpy( p, "# '" ); p += 3; - strncpylwr( p, pp->key.p, pp->key.len ); p += pp->key.len; - p += snprintf( p, 28, "' -> %f\n", pp->prob ); - if( p+MAXWORDLEN+32 > (iobuf+1) ) - { - write( fd, iobuf, p-iobuf ); - p = iobuf; - } - } - } - if( p != iobuf ) - { - write( fd, iobuf, p-iobuf ); - } + char iobuf[IOBUFSIZE]; + char *p; + discrim_t *pp; + + p = iobuf; + p += sprintf(iobuf, "# Spamicity: %f\n", pstat->spamicity); + + for (pp = pstat->extrema; pp < pstat->extrema + pstat->keepers; pp++) { + if (pp->key.len) { + strcpy(p, "# '"); + p += 3; + strncpylwr(p, pp->key.p, pp->key.len); + p += pp->key.len; + p += snprintf(p, 28, "' -> %f\n", pp->prob); + if (p + MAXWORDLEN + 32 > (iobuf + 1)) { + write(fd, iobuf, p - iobuf); + p = iobuf; + } + } + } + if (p != iobuf) { + write(fd, iobuf, p - iobuf); + } } -void bayesfilt( dbt_t* pglist, dbt_t* pblist, vec_t* pmlist, stats_t* pstats ) +void +bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats) { - veciter_t iter; - str_t* pword; + veciter_t iter; + str_t *pword; - double prob, product, invproduct, dev; - double slotdev, hitdev; + double prob, product, invproduct, dev; + double slotdev, hitdev; #ifdef NON_EQUIPROBABLE - /* There is an argument that we should (go?) by number of *words* here. */ - double msg_prob = ((double)pblist->nitems / (double)pglist->nitems); + /* There is an argument that we should (go?) by number of *words* + * here. */ + double msg_prob = ((double) pblist->nitems / (double) pglist->nitems); + #endif - discrim_t* pp; - discrim_t* hit; - - for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) - { - pp->key.p = NULL; - pp->key.len = 0; - pp->prob = 0.5f; - } - - vec_first( pmlist, &iter ); - while( (pword = veciter_get( &iter )) != NULL ) - { - double goodness = pglist->getcount( pglist, pword ); - double spamness = pblist->getcount( pblist, pword ); - uint goodtotal = pglist->getmsgcount( pglist ); - uint spamtotal = pblist->getmsgcount( pblist ); - - if( goodness + spamness < MINIMUM_FREQ ) - { + discrim_t *pp; + discrim_t *hit; + + for (pp = pstats->extrema; pp < pstats->extrema + pstats->keepers; pp++) { + pp->key.p = NULL; + pp->key.len = 0; + pp->prob = 0.5f; + } + + vec_first(pmlist, &iter); + while ((pword = veciter_get(&iter)) != NULL) { + double goodness = pglist->getcount(pglist, pword); + double spamness = pblist->getcount(pblist, pword); + uint goodtotal = pglist->getmsgcount(pglist); + uint spamtotal = pblist->getmsgcount(pblist); + + if (goodness + spamness < MINIMUM_FREQ) { #ifdef NON_EQUIPROBABLE - /* - * In the absence of evidence, the probability that a new word will - * be spam is the historical ratio of spam words to nonspam words. - */ - prob = msg_prob; + /* + * In the absence of evidence, the probability that a new word will + * be spam is the historical ratio of spam words to nonspam words. + */ + prob = msg_prob; #else - prob = UNKNOWN_WORD; + prob = UNKNOWN_WORD; #endif - } - else - { - double goodprob = goodtotal ? min( 1.0, (goodness / goodtotal) ) : 0.0; - double spamprob = spamtotal ? min( 1.0, (spamness / spamtotal) ) : 0.0; - assert( goodtotal > 0 || spamtotal > 0 ); + } else { + double goodprob = goodtotal ? min(1.0, (goodness / goodtotal)) : 0.0; + double spamprob = spamtotal ? min(1.0, (spamness / spamtotal)) : 0.0; + + assert(goodtotal > 0 || spamtotal > 0); #ifdef NON_EQUIPROBABLE - prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_prob)) + (spamprob * msg_prob)); + prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_prob)) + (spamprob * msg_prob)); #else - prob = spamprob / (goodprob + spamprob); + prob = spamprob / (goodprob + spamprob); #endif - prob = minmax( prob, 0.01, 0.99 ); - } - - /* update the list of tokens with maximum deviation */ - dev = DEVIATION(prob); - hit = NULL; - hitdev = 0; - for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) - { - /* don't allow duplicate tokens in the stats.extrema */ - if( pp->key.len > 0 && str_casecmp( pword, &pp->key ) == 0 ) - { - hit = NULL; - break; - } - - slotdev = DEVIATION(pp->prob); - if (dev>slotdev && dev>hitdev) - { - hit = pp; - hitdev = slotdev; - } - } - if (hit) - { - hit->prob = prob; - hit->key = *pword; - } - - veciter_next( &iter ); - } - veciter_destroy( &iter ); - - /* - * Bayes' theorem. - * For discussion, see <http://www.mathpages.com/home/kmath267.htm>. - */ - product = invproduct = 1.0f; - for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) - { - if( pp->prob == 0 ) - { - break; - } - else - { - product *= pp->prob; - invproduct *= (1 - pp->prob); - } - } - pstats->spamicity = product / (product + invproduct); + prob = minmax(prob, 0.01, 0.99); + } + + /* update the list of tokens with maximum deviation */ + dev = DEVIATION(prob); + hit = NULL; + hitdev = 0; + for (pp = pstats->extrema; pp < pstats->extrema + pstats->keepers; pp++) { + /* don't allow duplicate tokens in the stats.extrema */ + if (pp->key.len > 0 && str_casecmp(pword, &pp->key) == 0) { + hit = NULL; + break; + } + slotdev = DEVIATION(pp->prob); + if (dev > slotdev && dev > hitdev) { + hit = pp; + hitdev = slotdev; + } + } + if (hit) { + hit->prob = prob; + hit->key = *pword; + } + veciter_next(&iter); + } + veciter_destroy(&iter); + + /* + * Bayes' theorem. + * For discussion, see <http://www.mathpages.com/home/kmath267.htm>. + */ + product = invproduct = 1.0f; + for (pp = pstats->extrema; pp < pstats->extrema + pstats->keepers; pp++) { + if (pp->prob == 0) { + break; + } else { + product *= pp->prob; + invproduct *= (1 - pp->prob); + } + } + pstats->spamicity = product / (product + invproduct); } -bool_t bvec_loadmsg( vec_t* pthis, lex_t* plex, tok_t* ptok ) +bool_t +bvec_loadmsg(vec_t * pthis, lex_t * plex, tok_t * ptok) { - str_t w; - - lex_nexttoken( plex, ptok ); - while( ptok->tt != eof && ptok->tt != from ) - { - w.p = ptok->p; - w.len = ptok->len; - vec_addtail( pthis, &w ); - lex_nexttoken( plex, ptok ); - } - - return true; + str_t w; + + lex_nexttoken(plex, ptok); + while (ptok->tt != eof && ptok->tt != from) { + w.p = ptok->p; + w.len = ptok->len; + vec_addtail(pthis, &w); + lex_nexttoken(plex, ptok); + } + + return true; } diff --git a/lex.c b/lex.c @@ -16,772 +16,708 @@ static cpchar g_htmltags[] = { - "abbr", - "above", - "accesskey", - "acronym", - "align", - "alink", - "all", - "alt", - "applet", - "archive", - "axis", - "basefont", - "baseline", - "below", - "bgcolor", - "big", - "body", - "border", - "bottom", - "box", - "button", - "cellpadding", - "cellspacing", - "center", - "char", - "charoff", - "charset", - "circle", - "cite", - "class", - "classid", - "clear", - "codebase", - "codetype", - "color", - "cols", - "colspan", - "compact", - "content", - "coords", - "data", - "datetime", - "declare", - "default", - "defer", - "dfn", - "dir", - "disabled", - "face", - "font", - "frameborder", - "groups", - "head", - "headers", - "height", - "href", - "hreflang", - "hsides", - "hspace", - "http-equiv", - "iframe", - "img", - "input", - "ismap", - "justify", - "kbd", - "label", - "lang", - "language", - "left", - "lhs", - "link", - "longdesc", - "map", - "marginheight", - "marginwidth", - "media", - "meta", - "middle", - "multiple", - "name", - "nohref", - "none", - "noresize", - "noshade", - "nowrap", - "object", - "onblur", - "onchange", - "onclick", - "ondblclick", - "onfocus", - "onkeydown", - "onkeypress", - "onkeyup", - "onload", - "onmousedown", - "onmousemove", - "onmouseout", - "onmouseover", - "onmouseup", - "onselect", - "onunload", - "param", - "poly", - "profile", - "prompt", - "readonly", - "rect", - "rel", - "rev", - "rhs", - "right", - "rows", - "rowspan", - "rules", - "samp", - "scheme", - "scope", - "script", - "scrolling", - "select", - "selected", - "shape", - "size", - "small", - "span", - "src", - "standby", - "strike", - "strong", - "style", - "sub", - "summary", - "sup", - "tabindex", - "table", - "target", - "textarea", - "title", - "top", - "type", - "usemap", - "valign", - "value", - "valuetype", - "var", - "vlink", - "void", - "vsides", - "vspace", - "width" + "abbr", + "above", + "accesskey", + "acronym", + "align", + "alink", + "all", + "alt", + "applet", + "archive", + "axis", + "basefont", + "baseline", + "below", + "bgcolor", + "big", + "body", + "border", + "bottom", + "box", + "button", + "cellpadding", + "cellspacing", + "center", + "char", + "charoff", + "charset", + "circle", + "cite", + "class", + "classid", + "clear", + "codebase", + "codetype", + "color", + "cols", + "colspan", + "compact", + "content", + "coords", + "data", + "datetime", + "declare", + "default", + "defer", + "dfn", + "dir", + "disabled", + "face", + "font", + "frameborder", + "groups", + "head", + "headers", + "height", + "href", + "hreflang", + "hsides", + "hspace", + "http-equiv", + "iframe", + "img", + "input", + "ismap", + "justify", + "kbd", + "label", + "lang", + "language", + "left", + "lhs", + "link", + "longdesc", + "map", + "marginheight", + "marginwidth", + "media", + "meta", + "middle", + "multiple", + "name", + "nohref", + "none", + "noresize", + "noshade", + "nowrap", + "object", + "onblur", + "onchange", + "onclick", + "ondblclick", + "onfocus", + "onkeydown", + "onkeypress", + "onkeyup", + "onload", + "onmousedown", + "onmousemove", + "onmouseout", + "onmouseover", + "onmouseup", + "onselect", + "onunload", + "param", + "poly", + "profile", + "prompt", + "readonly", + "rect", + "rel", + "rev", + "rhs", + "right", + "rows", + "rowspan", + "rules", + "samp", + "scheme", + "scope", + "script", + "scrolling", + "select", + "selected", + "shape", + "size", + "small", + "span", + "src", + "standby", + "strike", + "strong", + "style", + "sub", + "summary", + "sup", + "tabindex", + "table", + "target", + "textarea", + "title", + "top", + "type", + "usemap", + "valign", + "value", + "valuetype", + "var", + "vlink", + "void", + "vsides", + "vspace", + "width" }; -static const uint g_nhtmltags = sizeof(g_htmltags)/sizeof(cpchar); +static const uint g_nhtmltags = sizeof(g_htmltags) / sizeof(cpchar); static cpchar g_ignoredheaders[] = { - "Date:", - "Delivery-date:", - "Message-ID:", - "X-Sorted:", - "X-Spam-" + "Date:", + "Delivery-date:", + "Message-ID:", + "X-Sorted:", + "X-Spam-" }; -static const uint g_nignoredheaders = sizeof(g_ignoredheaders)/sizeof(cpchar); +static const uint g_nignoredheaders = sizeof(g_ignoredheaders) / sizeof(cpchar); -static inline bool_t is_whitespace( int c ) +static inline bool_t +is_whitespace(int c) { - return ( c == ' ' || c == '\t' || c == '\r' ); + return (c == ' ' || c == '\t' || c == '\r'); } -static inline bool_t is_base64char(c) +static inline bool_t +is_base64char(c) { - return ( isalnum(c) || (c == '/' || c == '+') ); + return (isalnum(c) || (c == '/' || c == '+')); } -static inline bool_t is_wordmidchar(c) +static inline bool_t +is_wordmidchar(c) { - return ( isalnum(c) || c == '$' || c == '\'' || c == '.' || c == '-' ); + return (isalnum(c) || c == '$' || c == '\'' || c == '.' || c == '-'); } -static inline bool_t is_wordendchar(c) +static inline bool_t +is_wordendchar(c) { - return ( isalnum(c) || c == '$' ); + return (isalnum(c) || c == '$'); } -static inline bool_t is_htmltag( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_htmltag(cpchar p, uint len, uint * ptoklen) { - int lo, hi, mid, minlen, cmp; - - *ptoklen = 0; - - hi = g_nhtmltags-1; - lo = -1; - while( hi-lo > 1 ) - { - mid = (hi+lo)/2; - minlen = min( strlen(g_htmltags[mid]), len ); - cmp = strncmp( g_htmltags[mid], p, minlen ); - if( cmp > 0 || (cmp == 0 && minlen < len && !islower(p[minlen])) ) - hi = mid; - else - lo = mid; - } - minlen = min( strlen(g_htmltags[hi]), len ); - if( len == minlen || strncmp(g_htmltags[hi], p, minlen) != 0 ) - { - return false; - } - - /* check if is_word() will have a longer match */ - if( is_wordendchar(p[minlen]) ) - { - return false; - } - if( is_wordmidchar(p[minlen]) && is_wordendchar(p[minlen+1]) ) - { - return false; - } - - *ptoklen = strlen(g_htmltags[hi]); - - return true; + int lo, hi, mid, minlen, cmp; + + *ptoklen = 0; + + hi = g_nhtmltags - 1; + lo = -1; + while (hi - lo > 1) { + mid = (hi + lo) / 2; + minlen = min(strlen(g_htmltags[mid]), len); + cmp = strncmp(g_htmltags[mid], p, minlen); + if (cmp > 0 || (cmp == 0 && minlen < len && !islower(p[minlen]))) + hi = mid; + else + lo = mid; + } + minlen = min(strlen(g_htmltags[hi]), len); + if (len == minlen || strncmp(g_htmltags[hi], p, minlen) != 0) { + return false; + } + /* check if is_word() will have a longer match */ + if (is_wordendchar(p[minlen])) { + return false; + } + if (is_wordmidchar(p[minlen]) && is_wordendchar(p[minlen + 1])) { + return false; + } + *ptoklen = strlen(g_htmltags[hi]); + + return true; } -static inline bool_t is_htmlcomment( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_htmlcomment(cpchar p, uint len, uint * ptoklen) { - *ptoklen = 0; - - if( len >=4 && memcmp( p, "<!--", 4 ) == 0 ) - { - *ptoklen = 4; - return true; - } - if( len >= 3 && memcmp( p, "-->", 3 ) == 0 ) - { - *ptoklen = 3; - return true; - } - - return false; + *ptoklen = 0; + + if (len >= 4 && memcmp(p, "<!--", 4) == 0) { + *ptoklen = 4; + return true; + } + if (len >= 3 && memcmp(p, "-->", 3) == 0) { + *ptoklen = 3; + return true; + } + return false; } -static inline bool_t is_base64( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_base64(cpchar p, uint len, uint * ptoklen) { - *ptoklen = 0; - while( len > 0 ) - { - if( *p != '\n' && *p != '\r' && !is_base64char(*p) ) - { - return false; - } - p++; - len--; - (*ptoklen)++; - } - return true; + *ptoklen = 0; + while (len > 0) { + if (*p != '\n' && *p != '\r' && !is_base64char(*p)) { + return false; + } + p++; + len--; + (*ptoklen)++; + } + return true; } -static inline bool_t is_mimeboundary( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_mimeboundary(cpchar p, uint len, uint * ptoklen) { - *ptoklen = 0; - - if( len < 3 || p[0] != '-' || p[1] != '-' ) - { - return false; - } - p += 2; - len -= 2; - *ptoklen += 2; - while( len > 0 ) - { - if( is_whitespace(*p) ) - { - return false; - } - if( *p == '\n' || *p == '\r' ) - { - break; - } - p++; - len--; - (*ptoklen)++; - } - return true; + *ptoklen = 0; + + if (len < 3 || p[0] != '-' || p[1] != '-') { + return false; + } + p += 2; + len -= 2; + *ptoklen += 2; + while (len > 0) { + if (is_whitespace(*p)) { + return false; + } + if (*p == '\n' || *p == '\r') { + break; + } + p++; + len--; + (*ptoklen)++; + } + return true; } -static inline bool_t is_ipaddr( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_ipaddr(cpchar p, uint len, uint * ptoklen) { - uint noctets, ndigits; - - *ptoklen = 0; - - noctets = 0; - while( len > 0 && noctets < 4 ) - { - ndigits = 0; - while( len > 0 && isdigit(*p) ) - { - ndigits++; - p++; - len--; - (*ptoklen)++; - } - if( ndigits == 0 || ndigits > 3 ) - { - return false; - } - noctets++; - if( noctets < 4 ) - { - if( *p != '.' ) - { - return false; - } - p++; - len--; - (*ptoklen)++; - } - } - if( noctets < 4 ) - { - return false; - } - return true; + uint noctets, ndigits; + + *ptoklen = 0; + + noctets = 0; + while (len > 0 && noctets < 4) { + ndigits = 0; + while (len > 0 && isdigit(*p)) { + ndigits++; + p++; + len--; + (*ptoklen)++; + } + if (ndigits == 0 || ndigits > 3) { + return false; + } + noctets++; + if (noctets < 4) { + if (*p != '.') { + return false; + } + p++; + len--; + (*ptoklen)++; + } + } + if (noctets < 4) { + return false; + } + return true; } -static inline bool_t is_word( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_word(cpchar p, uint len, uint * ptoklen) { - if( len < 3 ) - { - return false; - } - if( !(isalpha(*p) || *p == '$') ) - { - return false; - } - *ptoklen = 1; - p++; - len--; - while( len > 0 ) - { - if( !is_wordmidchar(*p) ) - { - break; - } - (*ptoklen)++; - p++; - len--; - } - while( *ptoklen >= 3 && !is_wordendchar(*(p-1)) ) - { - (*ptoklen)--; - p--; - len++; - } - if( *ptoklen < 3 ) - { - return false; - } - - return true; + if (len < 3) { + return false; + } + if (!(isalpha(*p) || *p == '$')) { + return false; + } + *ptoklen = 1; + p++; + len--; + while (len > 0) { + if (!is_wordmidchar(*p)) { + break; + } + (*ptoklen)++; + p++; + len--; + } + while (*ptoklen >= 3 && !is_wordendchar(*(p - 1))) { + (*ptoklen)--; + p--; + len++; + } + if (*ptoklen < 3) { + return false; + } + return true; } -static inline bool_t is_ignoredheader( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_ignoredheader(cpchar p, uint len, uint * ptoklen) { - int lo, hi, mid, minlen, cmp; - - hi = g_nignoredheaders-1; - lo = -1; - while( hi-lo > 1 ) - { - mid = (hi+lo)/2; - minlen = min( strlen(g_ignoredheaders[mid]), len ); - cmp = strncasecmp( g_ignoredheaders[mid], p, minlen ); - if( cmp >= 0 ) - hi = mid; - else - lo = mid; - } - minlen = min( strlen(g_ignoredheaders[hi]), len ); - if( len == minlen || strncasecmp(g_ignoredheaders[hi], p, minlen) != 0 ) - { - return false; - } - *ptoklen = len; - return true; + int lo, hi, mid, minlen, cmp; + + hi = g_nignoredheaders - 1; + lo = -1; + while (hi - lo > 1) { + mid = (hi + lo) / 2; + minlen = min(strlen(g_ignoredheaders[mid]), len); + cmp = strncasecmp(g_ignoredheaders[mid], p, minlen); + if (cmp >= 0) + hi = mid; + else + lo = mid; + } + minlen = min(strlen(g_ignoredheaders[hi]), len); + if (len == minlen || strncasecmp(g_ignoredheaders[hi], p, minlen) != 0) { + return false; + } + *ptoklen = len; + return true; } -static inline bool_t is_mailerid( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_mailerid(cpchar p, uint len, uint * ptoklen) { - if( len < 4 || strncmp( p, "\tid ", 4 ) != 0 ) - { - return false; - } - *ptoklen = len; - return true; + if (len < 4 || strncmp(p, "\tid ", 4) != 0) { + return false; + } + *ptoklen = len; + return true; } -static inline bool_t is_spamtext( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_spamtext(cpchar p, uint len, uint * ptoklen) { - if( len < 5 || strncmp( p, "SPAM:", 5 ) != 0 ) - { - return false; - } - *ptoklen = len; - return true; + if (len < 5 || strncmp(p, "SPAM:", 5) != 0) { + return false; + } + *ptoklen = len; + return true; } -static inline bool_t is_smtpid( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_smtpid(cpchar p, uint len, uint * ptoklen) { - if( len < 8 || strncmp( p, "SMTP id ", 8 ) != 0 ) - { - return false; - } - *ptoklen = len; - return true; + if (len < 8 || strncmp(p, "SMTP id ", 8) != 0) { + return false; + } + *ptoklen = len; + return true; } -static inline bool_t is_boundaryequal( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_boundaryequal(cpchar p, uint len, uint * ptoklen) { - if( len < 9 || strncmp( p, "boundary=", 9 ) != 0 ) - { - return false; - } - *ptoklen = len; - return true; + if (len < 9 || strncmp(p, "boundary=", 9) != 0) { + return false; + } + *ptoklen = len; + return true; } -static inline bool_t is_nameequal( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_nameequal(cpchar p, uint len, uint * ptoklen) { - if( len < 6 || strncmp( p, "name=\"", 6 ) != 0 ) - { - return false; - } - *ptoklen = 6; - return true; + if (len < 6 || strncmp(p, "name=\"", 6) != 0) { + return false; + } + *ptoklen = 6; + return true; } -static inline bool_t is_filenameequal( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_filenameequal(cpchar p, uint len, uint * ptoklen) { - if( len < 10 || strncmp( p, "filename=\"", 10 ) != 0 ) - { - return false; - } - *ptoklen = 10; - return true; + if (len < 10 || strncmp(p, "filename=\"", 10) != 0) { + return false; + } + *ptoklen = 10; + return true; } -static inline bool_t is_from( cpchar p, uint len, uint* ptoklen ) +static inline bool_t +is_from(cpchar p, uint len, uint * ptoklen) { - if( len < 5 || strncmp( p, "From ", 5 ) != 0 ) - { - return false; - } - *ptoklen = 5; - return true; + if (len < 5 || strncmp(p, "From ", 5) != 0) { + return false; + } + *ptoklen = 5; + return true; } -/*****************************************************************************/ - -void lex_create( lex_t* pthis, mbox_t mboxtype ) +void +lex_create(lex_t * pthis, mbox_t mboxtype) { - pthis->mboxtype = mboxtype; - pthis->section = envelope; - pthis->pos = 0; - pthis->bom = 0; - pthis->eom = 0; - pthis->lineend = 0; - pthis->buflen = 0; - pthis->pbuf = NULL; + pthis->mboxtype = mboxtype; + pthis->section = envelope; + pthis->pos = 0; + pthis->bom = 0; + pthis->eom = 0; + pthis->lineend = 0; + pthis->buflen = 0; + pthis->pbuf = NULL; } -void lex_destroy( lex_t* pthis ) +void +lex_destroy(lex_t * pthis) { - free( pthis->pbuf ); + free(pthis->pbuf); } -bool_t lex_load( lex_t* pthis, int fd ) +bool_t +lex_load(lex_t * pthis, int fd) { - uint nalloc; - ssize_t nread; - - nalloc = IOBUFSIZE; - pthis->pbuf = (char*)malloc( IOBUFSIZE ); - if( pthis->pbuf == NULL ) - { - return false; - } - - while( (nread = read( fd, pthis->pbuf + pthis->buflen, nalloc - pthis->buflen )) > 0 ) - { - pthis->buflen += nread; - if( pthis->buflen == nalloc ) - { - char* pnewbuf; - nalloc += IOBUFSIZE; - pnewbuf = (char*)realloc( pthis->pbuf, nalloc ); - if( pnewbuf == NULL ) - { - free( pthis->pbuf ); - pthis->pbuf = NULL; - return false; - } - pthis->pbuf = pnewbuf; - } - } - if( nread < 0 ) - { - free( pthis->pbuf ); - pthis->pbuf = NULL; - return false; - } - if( pthis->mboxtype == detect ) - { - if( pthis->buflen > 5 && memcmp( pthis->pbuf, "From ", 5 ) == 0 ) - { - verbose( 1, "Input looks like an mbox\n" ); - pthis->mboxtype = mbox; - } - else - { - verbose( 1, "Input looks like a maildir\n" ); - pthis->mboxtype = maildir; - } - } - - return true; + uint nalloc; + ssize_t nread; + + nalloc = IOBUFSIZE; + pthis->pbuf = (char *) malloc(IOBUFSIZE); + if (pthis->pbuf == NULL) { + return false; + } + while ((nread = read(fd, pthis->pbuf + pthis->buflen, nalloc - pthis->buflen)) > 0) { + pthis->buflen += nread; + if (pthis->buflen == nalloc) { + char *pnewbuf; + + nalloc += IOBUFSIZE; + pnewbuf = (char *) realloc(pthis->pbuf, nalloc); + if (pnewbuf == NULL) { + free(pthis->pbuf); + pthis->pbuf = NULL; + return false; + } + pthis->pbuf = pnewbuf; + } + } + if (nread < 0) { + free(pthis->pbuf); + pthis->pbuf = NULL; + return false; + } + if (pthis->mboxtype == detect) { + if (pthis->buflen > 5 && memcmp(pthis->pbuf, "From ", 5) == 0) { + verbose(1, "Input looks like an mbox\n"); + pthis->mboxtype = mbox; + } else { + verbose(1, "Input looks like a maildir\n"); + pthis->mboxtype = maildir; + } + } + return true; } -static bool_t lex_nextline( lex_t* pthis ) +static bool_t +lex_nextline(lex_t * pthis) { - cpchar pbuf; - uint len; - uint toklen; + cpchar pbuf; + uint len; + uint toklen; again: - /* XXX: use and update pthis->section */ - pthis->pos = pthis->lineend; - if( pthis->lineend == pthis->buflen ) - { - return false; - } - - pbuf = pthis->pbuf + pthis->pos; - len = 0; - while( pthis->pos + len < pthis->buflen && pbuf[len] != '\n' ) - { - len++; - } - if( pthis->pos + len < pthis->buflen ) - { - len++; /* bump past the LF */ - } - - pthis->lineend = pthis->pos + len; - - /* check beginning-of-line patterns */ - if( is_base64( pbuf, len, &toklen ) || - is_ignoredheader( pbuf, len, &toklen ) || - is_mailerid( pbuf, len, &toklen ) || - is_mimeboundary( pbuf, len, &toklen ) || - is_spamtext( pbuf, len, &toklen ) ) - { - /* ignore line */ - pthis->pos += toklen; - goto again; - } - - return true; + /* XXX: use and update pthis->section */ + pthis->pos = pthis->lineend; + if (pthis->lineend == pthis->buflen) { + return false; + } + pbuf = pthis->pbuf + pthis->pos; + len = 0; + while (pthis->pos + len < pthis->buflen && pbuf[len] != '\n') { + len++; + } + if (pthis->pos + len < pthis->buflen) { + len++; /* bump past the LF */ + } + pthis->lineend = pthis->pos + len; + + /* check beginning-of-line patterns */ + if (is_base64(pbuf, len, &toklen) || + is_ignoredheader(pbuf, len, &toklen) || + is_mailerid(pbuf, len, &toklen) || + is_mimeboundary(pbuf, len, &toklen) || + is_spamtext(pbuf, len, &toklen)) { + /* ignore line */ + pthis->pos += toklen; + goto again; + } + return true; } -void lex_nexttoken( lex_t* pthis, tok_t* ptok ) +void +lex_nexttoken(lex_t * pthis, tok_t * ptok) { - cpchar pbuf; - uint len; - uint toklen; - - assert( pthis->pbuf != NULL ); + cpchar pbuf; + uint len; + uint toklen; - if( pthis->pos == pthis->eom ) - { - pthis->bom = pthis->pos; - } + assert(pthis->pbuf != NULL); + if (pthis->pos == pthis->eom) { + pthis->bom = pthis->pos; + } again: - /* skip whitespace between tokens */ - while( pthis->pos != pthis->lineend && is_whitespace(pthis->pbuf[pthis->pos]) ) - { - pthis->pos++; - } - - pbuf = pthis->pbuf + pthis->pos; - len = pthis->lineend - pthis->pos; - - /* possibilities: end-of-line, html-comment, ipaddr, word, junk */ - - if( pthis->pos == pthis->lineend ) - { - if( !lex_nextline( pthis ) ) - { - pthis->eom = pthis->pos; - ptok->tt = eof; - return; - } - - pbuf = pthis->pbuf + pthis->pos; - len = pthis->lineend - pthis->pos; - - if( pthis->mboxtype == mbox ) - { - if( is_from( pbuf, len, &toklen ) ) - { - pthis->eom = pthis->pos; - ptok->tt = from; - ptok->p = pthis->pbuf + pthis->pos; - ptok->len = toklen; - pthis->pos += toklen; - return; - } - } - - goto again; /* skip lws */ - } - - if( is_htmltag( pbuf, len, &toklen ) || - is_htmlcomment( pbuf, len, &toklen ) || - is_smtpid( pbuf, len, &toklen ) || - is_boundaryequal( pbuf, len, &toklen ) || - is_nameequal( pbuf, len, &toklen ) || - is_filenameequal( pbuf, len, &toklen ) ) - { - /* ignore it */ - pthis->pos += toklen; - goto again; - } - - if( is_ipaddr( pbuf, len, &toklen ) ) - { - ptok->tt = word; - ptok->p = pthis->pbuf + pthis->pos; - ptok->len = toklen; - pthis->pos += toklen; - return; - } - if( is_word( pbuf, len, &toklen ) ) - { - ptok->tt = word; - ptok->p = pthis->pbuf + pthis->pos; - ptok->len = toklen; - pthis->pos += toklen; - if( toklen > MAXWORDLEN ) - { - goto again; - } - return; - } - - /* junk */ - pthis->pos++; - goto again; + /* skip whitespace between tokens */ + while (pthis->pos != pthis->lineend && is_whitespace(pthis->pbuf[pthis->pos])) { + pthis->pos++; + } + + pbuf = pthis->pbuf + pthis->pos; + len = pthis->lineend - pthis->pos; + + /* possibilities: end-of-line, html-comment, ipaddr, word, junk */ + + if (pthis->pos == pthis->lineend) { + if (!lex_nextline(pthis)) { + pthis->eom = pthis->pos; + ptok->tt = eof; + return; + } + pbuf = pthis->pbuf + pthis->pos; + len = pthis->lineend - pthis->pos; + + if (pthis->mboxtype == mbox) { + if (is_from(pbuf, len, &toklen)) { + pthis->eom = pthis->pos; + ptok->tt = from; + ptok->p = pthis->pbuf + pthis->pos; + ptok->len = toklen; + pthis->pos += toklen; + return; + } + } + goto again; /* skip lws */ + } + if (is_htmltag(pbuf, len, &toklen) || + is_htmlcomment(pbuf, len, &toklen) || + is_smtpid(pbuf, len, &toklen) || + is_boundaryequal(pbuf, len, &toklen) || + is_nameequal(pbuf, len, &toklen) || + is_filenameequal(pbuf, len, &toklen)) { + /* ignore it */ + pthis->pos += toklen; + goto again; + } + if (is_ipaddr(pbuf, len, &toklen)) { + ptok->tt = word; + ptok->p = pthis->pbuf + pthis->pos; + ptok->len = toklen; + pthis->pos += toklen; + return; + } + if (is_word(pbuf, len, &toklen)) { + ptok->tt = word; + ptok->p = pthis->pbuf + pthis->pos; + ptok->len = toklen; + pthis->pos += toklen; + if (toklen > MAXWORDLEN) { + goto again; + } + return; + } + /* junk */ + pthis->pos++; + goto again; } /* SpamAssassin style passthru */ -void lex_passthru( lex_t* pthis, bool_t is_spam, double hits ) +void +lex_passthru(lex_t * pthis, bool_t is_spam, double hits) { - char szbuf[256]; - bool_t in_headers = true; - - assert( pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen ); - assert( pthis->bom <= pthis->eom ); - - pthis->pos = pthis->bom; - if( is_spam ) - { - sprintf( szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests=bmf\n" - "X-Spam-Flag: YES\n", - hits, SPAM_CUTOFF ); - } - else - { - sprintf( szbuf, "X-Spam-Status: No, hits=%f required=%f\n", - hits, SPAM_CUTOFF ); - } - - /* existing headers */ - while( in_headers && pthis->pos < pthis->eom ) - { - cpchar pbuf = pthis->pbuf + pthis->pos; - uint len = 0; - while( pthis->pos + len < pthis->buflen && pbuf[len] != '\n' ) - { - len++; - } - if( pthis->pos + len < pthis->buflen ) - { - len++; /* bump past the LF */ - } - - /* check for end of headers */ - if( pbuf[0] == '\n' || (pbuf[0] == '\r' && pbuf[1] == '\n') ) - { - /* end of headers */ - break; - } - - /* write header, ignoring existing spam headers */ - if( strncasecmp( pbuf, "X-Spam-", 7 ) != 0 ) - { - write( STDOUT_FILENO, pbuf, len ); - } - - pthis->pos += len; - } - - /* new headers */ - write( STDOUT_FILENO, szbuf, strlen(szbuf) ); - - /* remainder */ - if( pthis->pos < pthis->eom ) - { - write( STDOUT_FILENO, pthis->pbuf+pthis->pos, pthis->eom-pthis->pos ); - } - pthis->bom = pthis->eom; + char szbuf[256]; + bool_t in_headers = true; + + assert(pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen); + assert(pthis->bom <= pthis->eom); + + pthis->pos = pthis->bom; + if (is_spam) { + sprintf(szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests=bmf\n" + "X-Spam-Flag: YES\n", + hits, SPAM_CUTOFF); + } else { + sprintf(szbuf, "X-Spam-Status: No, hits=%f required=%f\n", + hits, SPAM_CUTOFF); + } + + /* existing headers */ + while (in_headers && pthis->pos < pthis->eom) { + cpchar pbuf = pthis->pbuf + pthis->pos; + uint len = 0; + + while (pthis->pos + len < pthis->buflen && pbuf[len] != '\n') { + len++; + } + if (pthis->pos + len < pthis->buflen) { + len++; /* bump past the LF */ + } + /* check for end of headers */ + if (pbuf[0] == '\n' || (pbuf[0] == '\r' && pbuf[1] == '\n')) { + /* end of headers */ + break; + } + /* write header, ignoring existing spam headers */ + if (strncasecmp(pbuf, "X-Spam-", 7) != 0) { + write(STDOUT_FILENO, pbuf, len); + } + pthis->pos += len; + } + + /* new headers */ + write(STDOUT_FILENO, szbuf, strlen(szbuf)); + + /* remainder */ + if (pthis->pos < pthis->eom) { + write(STDOUT_FILENO, pthis->pbuf + pthis->pos, pthis->eom - pthis->pos); + } + pthis->bom = pthis->eom; } #ifdef UNIT_TEST -int main( int argc, char** argv ) +int +main(int argc, char **argv) { - int fd; - lex_t lex; - tok_t tok; - - fd = STDIN_FILENO; - if( argc == 2 ) - { - fd = open( argv[1], O_RDONLY ); - } - - lex_create( &lex ); - if( ! lex_load( &lex, fd ) ) - { - fprintf( stderr, "cannot load file\n" ); - exit( 1 ); - } - - lex_nexttoken( &lex, &tok ); - while( tok.tt != eof ) - { - char sztok[64]; - if( tok.len > MAXWORDLEN ) - { - printf( "*** token too long! ***\n" ); - exit( 1 ); - } - - memcpy( sztok, tok.p, tok.len ); - strlwr( sztok ); - sztok[tok.len] = '\0'; - printf( "get_token: %d '%s'\n", tok.tt, sztok ); - - lex_nexttoken( &lex, &tok ); - } - - lex_destroy( &lex ); - return 0; + int fd; + lex_t lex; + tok_t tok; + + fd = STDIN_FILENO; + if (argc == 2) { + fd = open(argv[1], O_RDONLY); + } + lex_create(&lex); + if (!lex_load(&lex, fd)) { + fprintf(stderr, "cannot load file\n"); + exit(1); + } + lex_nexttoken(&lex, &tok); + while (tok.tt != eof) { + char sztok[64]; + + if (tok.len > MAXWORDLEN) { + printf("*** token too long! ***\n"); + exit(1); + } + memcpy(sztok, tok.p, tok.len); + strlwr(sztok); + sztok[tok.len] = '\0'; + printf("get_token: %d '%s'\n", tok.tt, sztok); + + lex_nexttoken(&lex, &tok); + } + + lex_destroy(&lex); + return 0; } -#endif /* def UNIT_TEST */ +#endif /* def UNIT_TEST */ diff --git a/str.c b/str.c @@ -11,68 +11,72 @@ #include "dbg.h" #include "str.h" -void strlwr( char* s ) +void +strlwr(char *s) { - while( *s != '\0' ) - { - *s = tolower(*s); - s++; - } + while (*s != '\0') { + *s = tolower(*s); + s++; + } } -void strcpylwr( char* d, const char* s ) +void +strcpylwr(char *d, const char *s) { - while( *s != '\0' ) - { - *d++ = tolower(*s++); - } + while (*s != '\0') { + *d++ = tolower(*s++); + } } -void strncpylwr( char* d, const char* s, int n ) +void +strncpylwr(char *d, const char *s, int n) { - while( n-- ) - { - *d++ = tolower(*s++); - } + while (n--) { + *d++ = tolower(*s++); + } } -void str_create( str_t* pstr ) +void +str_create(str_t * pstr) { - pstr->p = NULL; - pstr->len = 0; + pstr->p = NULL; + pstr->len = 0; } -void str_destroy( str_t* pstr ) +void +str_destroy(str_t * pstr) { - /* empty */ + /* empty */ } -int str_cmp( const str_t* pthis, const str_t* pother ) +int +str_cmp(const str_t * pthis, const str_t * pother) { - uint minlen = min( pthis->len, pother->len ); - int cmp; - assert( pthis->p != NULL && pother->p != NULL && minlen != 0 ); + uint minlen = min(pthis->len, pother->len); + int cmp; - cmp = strncmp( pthis->p, pother->p, minlen ); + assert(pthis->p != NULL && pother->p != NULL && minlen != 0); - if( cmp == 0 && pthis->len != pother->len ) - { - cmp = (pthis->len < pother->len) ? -1 : 1; - } - return cmp; + cmp = strncmp(pthis->p, pother->p, minlen); + + if (cmp == 0 && pthis->len != pother->len) { + cmp = (pthis->len < pother->len) ? -1 : 1; + } + return cmp; } -int str_casecmp( const str_t* pthis, const str_t* pother ) +int +str_casecmp(const str_t * pthis, const str_t * pother) { - uint minlen = min( pthis->len, pother->len ); - int cmp; - assert( pthis->p != NULL && pother->p != NULL && minlen != 0 ); + uint minlen = min(pthis->len, pother->len); + int cmp; + + assert(pthis->p != NULL && pother->p != NULL && minlen != 0); - cmp = strncasecmp( pthis->p, pother->p, minlen ); + cmp = strncasecmp(pthis->p, pother->p, minlen); - if( cmp == 0 && pthis->len != pother->len ) - { - cmp = (pthis->len < pother->len) ? -1 : 1; - } - return cmp; + if (cmp == 0 && pthis->len != pother->len) { + cmp = (pthis->len < pother->len) ? -1 : 1; + } + return cmp; } diff --git a/vec.c b/vec.c @@ -23,323 +23,316 @@ * vector */ -void vec_create( vec_t* pthis ) +void +vec_create(vec_t * pthis) { - pthis->nalloc = VEC_INITIAL_SIZE; - pthis->nitems = 0; - pthis->pitems = (str_t*)malloc( VEC_INITIAL_SIZE*sizeof(str_t) ); + pthis->nalloc = VEC_INITIAL_SIZE; + pthis->nitems = 0; + pthis->pitems = (str_t *) malloc(VEC_INITIAL_SIZE * sizeof(str_t)); } -void vec_destroy( vec_t* pthis ) +void +vec_destroy(vec_t * pthis) { - free( pthis->pitems ); + free(pthis->pitems); } -static void vec_setsize( vec_t* pthis, uint nsize ) +static void +vec_setsize(vec_t * pthis, uint nsize) { - if( nsize > pthis->nalloc ) - { - uint nnewalloc; - str_t* pnewitems; - uint n; - - nnewalloc = pthis->nalloc * 2; - if( nnewalloc < nsize ) nnewalloc = nsize; - pnewitems = (str_t*)realloc( pthis->pitems, nnewalloc*sizeof(str_t) ); - if( pnewitems == NULL ) - { - exit( 2 ); - } - for( n = pthis->nitems; n < nsize; n++ ) - { - str_create( &pnewitems[n] ); - } - pthis->pitems = pnewitems; - pthis->nalloc = nnewalloc; - } + if (nsize > pthis->nalloc) { + uint nnewalloc; + str_t *pnewitems; + uint n; + + nnewalloc = pthis->nalloc * 2; + if (nnewalloc < nsize) + nnewalloc = nsize; + pnewitems = (str_t *) realloc(pthis->pitems, nnewalloc * sizeof(str_t)); + if (pnewitems == NULL) { + exit(2); + } + for (n = pthis->nitems; n < nsize; n++) { + str_create(&pnewitems[n]); + } + pthis->pitems = pnewitems; + pthis->nalloc = nnewalloc; + } } -void vec_addhead( vec_t* pthis, str_t* pstr ) +void +vec_addhead(vec_t * pthis, str_t * pstr) { - assert( pstr->p != NULL && pstr->len > 0 ); + assert(pstr->p != NULL && pstr->len > 0); - vec_setsize( pthis, pthis->nitems+1 ); - memmove( &pthis->pitems[1], &pthis->pitems[0], pthis->nitems*sizeof(str_t) ); - pthis->pitems[0] = *pstr; - pthis->nitems++; + vec_setsize(pthis, pthis->nitems + 1); + memmove(&pthis->pitems[1], &pthis->pitems[0], pthis->nitems * sizeof(str_t)); + pthis->pitems[0] = *pstr; + pthis->nitems++; } -void vec_addtail( vec_t* pthis, str_t* pstr ) +void +vec_addtail(vec_t * pthis, str_t * pstr) { - assert( pstr->p != NULL && pstr->len > 0 ); + assert(pstr->p != NULL && pstr->len > 0); - vec_setsize( pthis, pthis->nitems+1 ); - pthis->pitems[pthis->nitems] = *pstr; - pthis->nitems++; + vec_setsize(pthis, pthis->nitems + 1); + pthis->pitems[pthis->nitems] = *pstr; + pthis->nitems++; } -void vec_delhead( vec_t* pthis ) +void +vec_delhead(vec_t * pthis) { - assert( pthis->nitems > 0 ); - pthis->nitems--; - memmove( &pthis->pitems[0], &pthis->pitems[1], pthis->nitems*sizeof(str_t) ); + assert(pthis->nitems > 0); + pthis->nitems--; + memmove(&pthis->pitems[0], &pthis->pitems[1], pthis->nitems * sizeof(str_t)); } -void vec_deltail( vec_t* pthis ) +void +vec_deltail(vec_t * pthis) { - assert( pthis->nitems > 0 ); - pthis->nitems--; + assert(pthis->nitems > 0); + pthis->nitems--; } -void vec_first( vec_t* pthis, veciter_t* piter ) +void +vec_first(vec_t * pthis, veciter_t * piter) { - piter->plist = pthis; - piter->index = 0; + piter->plist = pthis; + piter->index = 0; } -void vec_last( vec_t* pthis, veciter_t* piter ) +void +vec_last(vec_t * pthis, veciter_t * piter) { - piter->plist = pthis; - piter->index = pthis->nitems; + piter->plist = pthis; + piter->index = pthis->nitems; } /***************************************************************************** * sorted vector */ -static int svec_compare( const void* p1, const void* p2 ) +static int +svec_compare(const void *p1, const void *p2) { - return str_casecmp( (const str_t*)p1, (const str_t*)p2 ); + return str_casecmp((const str_t *) p1, (const str_t *) p2); } -void svec_add( vec_t* pthis, str_t* pstr ) +void +svec_add(vec_t * pthis, str_t * pstr) { - int lo, hi, mid; - veciter_t iter; - - if( pthis->nitems == 0 ) - { - vec_addtail( pthis, pstr ); - return; - } - - if( str_casecmp( pstr, &pthis->pitems[0] ) < 0 ) - { - vec_addhead( pthis, pstr ); - return; - } - - hi = pthis->nitems - 1; - lo = -1; - while( hi-lo > 1 ) - { - mid = (hi+lo)/2; - if( str_casecmp( pstr, &pthis->pitems[mid] ) <= 0 ) - hi = mid; - else - lo = mid; - } - assert( hi < pthis->nitems ); - - iter.plist = pthis; - iter.index = hi; - - if( str_casecmp( pstr, &pthis->pitems[hi] ) < 0 ) - { - veciter_addbefore( &iter, pstr ); - } - else - { - veciter_addafter( &iter, pstr ); - } + int lo, hi, mid; + veciter_t iter; + + if (pthis->nitems == 0) { + vec_addtail(pthis, pstr); + return; + } + if (str_casecmp(pstr, &pthis->pitems[0]) < 0) { + vec_addhead(pthis, pstr); + return; + } + hi = pthis->nitems - 1; + lo = -1; + while (hi - lo > 1) { + mid = (hi + lo) / 2; + if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0) + hi = mid; + else + lo = mid; + } + assert(hi < pthis->nitems); + + iter.plist = pthis; + iter.index = hi; + + if (str_casecmp(pstr, &pthis->pitems[hi]) < 0) { + veciter_addbefore(&iter, pstr); + } else { + veciter_addafter(&iter, pstr); + } } -str_t* svec_find( vec_t* pthis, str_t* pstr ) +str_t * +svec_find(vec_t * pthis, str_t * pstr) { - int lo, hi, mid; - - if( pthis->nitems == 0 ) - { - return NULL; - } - - hi = pthis->nitems - 1; - lo = -1; - while( hi-lo > 1 ) - { - mid = (hi+lo)/2; - if( str_casecmp( pstr, &pthis->pitems[mid] ) <= 0 ) - hi = mid; - else - lo = mid; - } - assert( hi >= 0 && hi < pthis->nitems ); - - if( str_casecmp( pstr, &pthis->pitems[hi] ) != 0 ) - { - return NULL; - } - - return &pthis->pitems[hi]; + int lo, hi, mid; + + if (pthis->nitems == 0) { + return NULL; + } + hi = pthis->nitems - 1; + lo = -1; + while (hi - lo > 1) { + mid = (hi + lo) / 2; + if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0) + hi = mid; + else + lo = mid; + } + assert(hi >= 0 && hi < pthis->nitems); + + if (str_casecmp(pstr, &pthis->pitems[hi]) != 0) { + return NULL; + } + return &pthis->pitems[hi]; } -void svec_sort( vec_t* pthis ) +void +svec_sort(vec_t * pthis) { - if( pthis->nitems > 1 ) - { - qsort( pthis->pitems, pthis->nitems, sizeof(str_t), svec_compare ); - } + if (pthis->nitems > 1) { + qsort(pthis->pitems, pthis->nitems, sizeof(str_t), svec_compare); + } } /***************************************************************************** * vector iterator */ -void veciter_destroy( veciter_t* pthis ) +void +veciter_destroy(veciter_t * pthis) { - /* empty */ + /* empty */ } -str_t* veciter_get( veciter_t* pthis ) +str_t * +veciter_get(veciter_t * pthis) { - if( pthis->plist == NULL || pthis->index >= pthis->plist->nitems ) - { - return NULL; - } - - return &pthis->plist->pitems[pthis->index]; + if (pthis->plist == NULL || pthis->index >= pthis->plist->nitems) { + return NULL; + } + return &pthis->plist->pitems[pthis->index]; } -bool_t veciter_equal( veciter_t* pthis, veciter_t* pthat ) +bool_t +veciter_equal(veciter_t * pthis, veciter_t * pthat) { - if( pthis->plist != pthat->plist || - pthis->index != pthat->index ) - { - return false; - } - - return true; + if (pthis->plist != pthat->plist || + pthis->index != pthat->index) { + return false; + } + return true; } -bool_t veciter_hasitem( veciter_t* pthis ) +bool_t +veciter_hasitem(veciter_t * pthis) { - if( pthis->plist == NULL || pthis->index >= pthis->plist->nitems ) - { - return false; - } - return true; + if (pthis->plist == NULL || pthis->index >= pthis->plist->nitems) { + return false; + } + return true; } -bool_t veciter_prev( veciter_t* pthis ) +bool_t +veciter_prev(veciter_t * pthis) { - if( pthis->index == 0 ) - { - return false; - } - pthis->index--; - return true; + if (pthis->index == 0) { + return false; + } + pthis->index--; + return true; } -bool_t veciter_next( veciter_t* pthis ) +bool_t +veciter_next(veciter_t * pthis) { - pthis->index++; - if( pthis->index == pthis->plist->nitems ) - { - return false; - } - return true; + pthis->index++; + if (pthis->index == pthis->plist->nitems) { + return false; + } + return true; } -void veciter_addafter( veciter_t* pthis, str_t* pstr ) +void +veciter_addafter(veciter_t * pthis, str_t * pstr) { - str_t* pitems; - - vec_setsize( pthis->plist, pthis->plist->nitems+1 ); - assert( pthis->index < pthis->plist->nitems ); - pitems = pthis->plist->pitems; - - if( pthis->index != pthis->plist->nitems-1 ) - { - memmove( &pitems[pthis->index+2], &pitems[pthis->index+1], - (pthis->plist->nitems-pthis->index-1) * sizeof(str_t) ); - } - - pitems[pthis->index+1] = *pstr; - pthis->plist->nitems++; + str_t *pitems; + + vec_setsize(pthis->plist, pthis->plist->nitems + 1); + assert(pthis->index < pthis->plist->nitems); + pitems = pthis->plist->pitems; + + if (pthis->index != pthis->plist->nitems - 1) { + memmove(&pitems[pthis->index + 2], &pitems[pthis->index + 1], + (pthis->plist->nitems - pthis->index - 1) * sizeof(str_t)); + } + pitems[pthis->index + 1] = *pstr; + pthis->plist->nitems++; } -void veciter_addbefore( veciter_t* pthis, str_t* pstr ) +void +veciter_addbefore(veciter_t * pthis, str_t * pstr) { - str_t* pitems; + str_t *pitems; - vec_setsize( pthis->plist, pthis->plist->nitems+1 ); - assert( pthis->index < pthis->plist->nitems ); - pitems = pthis->plist->pitems; + vec_setsize(pthis->plist, pthis->plist->nitems + 1); + assert(pthis->index < pthis->plist->nitems); + pitems = pthis->plist->pitems; - memmove( &pitems[pthis->index+1], &pitems[pthis->index], - (pthis->plist->nitems-pthis->index) * sizeof(str_t) ); + memmove(&pitems[pthis->index + 1], &pitems[pthis->index], + (pthis->plist->nitems - pthis->index) * sizeof(str_t)); - pitems[pthis->index] = *pstr; - pthis->plist->nitems++; + pitems[pthis->index] = *pstr; + pthis->plist->nitems++; } -void veciter_del( veciter_t* pthis ) +void +veciter_del(veciter_t * pthis) { - str_t* pitems; - - assert( pthis->plist->nitems > 0 ); - pthis->plist->nitems--; - if( pthis->index < pthis->plist->nitems ) - { - pitems = pthis->plist->pitems; - memmove( &pitems[pthis->index], &pitems[pthis->index+1], - (pthis->plist->nitems-pthis->index) * sizeof(str_t) ); - } + str_t *pitems; + + assert(pthis->plist->nitems > 0); + pthis->plist->nitems--; + if (pthis->index < pthis->plist->nitems) { + pitems = pthis->plist->pitems; + memmove(&pitems[pthis->index], &pitems[pthis->index + 1], + (pthis->plist->nitems - pthis->index) * sizeof(str_t)); + } } #ifdef UNIT_TEST -int main( int argc, char** argv ) +int +main(int argc, char **argv) { - vec_t vl; - veciter_t iter; - str_t* pstr; - uint n; - - if( argc != 2 ) - { - fprintf( stderr, "usage: %s <file>\n", argv[0] ); - return 1; - } - - for( n = 0; n < 100; n++ ) - { - vec_create( &vl ); - vec_load( &vl, argv[1] ); - - vec_first( &vl, &iter ); - while( (pstr = veciter_get( &iter )) != NULL ) - { - char buf[256]; - char* p; - if( pstr->len > 200 ) - { - fprintf( stderr, "str too long: %u chars\n", pstr->len ); - break; - } - p = buf; - strcpy( buf, "str: " ); - p += 6; - memcpy( p, pstr->p, pstr->len ); - p += pstr->len; - sprintf( p, " %u", pstr->count ); - puts( buf ); - - veciter_next( &iter ); - } - - vec_destroy( &vl ); - } - - return 0; + vec_t vl; + veciter_t iter; + str_t *pstr; + uint n; + + if (argc != 2) { + fprintf(stderr, "usage: %s <file>\n", argv[0]); + return 1; + } + for (n = 0; n < 100; n++) { + vec_create(&vl); + vec_load(&vl, argv[1]); + + vec_first(&vl, &iter); + while ((pstr = veciter_get(&iter)) != NULL) { + char buf[256]; + char *p; + + if (pstr->len > 200) { + fprintf(stderr, "str too long: %u chars\n", pstr->len); + break; + } + p = buf; + strcpy(buf, "str: "); + p += 6; + memcpy(p, pstr->p, pstr->len); + p += pstr->len; + sprintf(p, " %u", pstr->count); + puts(buf); + + veciter_next(&iter); + } + + vec_destroy(&vl); + } + + return 0; } -#endif /* def UNIT_TEST */ +#endif /* def UNIT_TEST */