Mega Code Archive

 
Categories / C / Small Application
 

Create distinct wordlist sorted on frequency

#include <stdio.h> #include <string.h> #include <stdlib.h> #define MAXLINE 1024 #define MAXTOKENS 256 #define MINLEN 3 struct tnode { char *word; int count; struct tnode *left, *right; }; char **split(char *, char *); struct tnode *addtree(struct tnode *, char *); struct tnode *talloc(void); void treeprint(struct tnode *); void treefree(struct tnode *); void arrayprint(struct tnode **, int); int compare(const void *, const void *); int tnodecount(struct tnode *); int treetoarray(struct tnode *, struct tnode **); int main(void) { char *delim = ".,:;`'\"+-_(){}[]<>*&^%$#@!?~/|\\= \t\r\n1234567890"; char **tokens = NULL; char line[MAXLINE]; struct tnode *root = {0}; struct tnode **tarray = {0}; int i, len, lcount, ncount; i = len = lcount = ncount = 0; while(fgets(line, MAXLINE, stdin) != NULL) { len = strlen(line); if(len < MINLEN) continue; else lcount++; if(line[len - 1] == '\n') line[--len] = '\0'; tokens = split(line, delim); for(i = 0; tokens[i] != NULL; i++) root = addtree(root, tokens[i]); for(i = 0; tokens[i] != NULL; i++) free(tokens[i]); free(tokens); } ncount = tnodecount(root); if(ncount == 0) return 1; /* treeprint(root); */ tarray = malloc(ncount * sizeof(struct tnode *)); if(tarray == NULL) return 1; if(treetoarray(root, tarray) == -1) return 1; qsort(tarray, ncount, sizeof(struct tnode *), compare); arrayprint(tarray, ncount); treefree(root); return 0; } struct tnode *addtree(struct tnode *p, char *w) { int cond; if(p == NULL) { p = talloc(); p->word = strdup(w); p->count = 1; p->left = p->right = NULL; } else if((cond = strcmp(w, p->word)) == 0) p->count++; else if(cond < 0) p->left = addtree(p->left, w); else p->right = addtree(p->right, w); return p; } struct tnode *talloc(void) { return(struct tnode *)malloc(sizeof(struct tnode)); } void treeprint(struct tnode *p) { if(p != NULL) { treeprint(p->left); printf("%4d %s\n", p->count, p->word); treeprint(p->right); } } void treefree(struct tnode *p) { if(p != NULL) { treefree(p->left); treefree(p->right); free(p->word); free(p); } return; } void arrayprint(struct tnode **p, int x) { int i = 0; for(i = 0; i < x; i++) printf("%4d %s\n", p[i]->count, p[i]->word); return; } char **split(char *string, char *delim) { char **tokens = NULL; char *working = NULL; char *token = NULL; int idx = 0; tokens = malloc(sizeof(char *) * MAXTOKENS); if(tokens == NULL) return NULL; working = malloc(sizeof(char) * strlen(string) + 1); if(working == NULL) return NULL; strcpy(working, string); for(idx = 0; idx < MAXTOKENS; idx++) tokens[idx] = NULL; token = strtok(working, delim); idx = 0; while((idx < (MAXTOKENS - 1)) && (token != NULL)) { tokens[idx] = malloc(sizeof(char) * strlen(token) + 1); if(tokens[idx] != NULL) { strcpy(tokens[idx], token); idx++; token = strtok(NULL, delim); } } free(working); return tokens; } int treetoarray(struct tnode *tree, struct tnode **array) { static struct tnode **write = NULL; if(tree == NULL) return -1; if(array != NULL) write = array; if(tree != NULL) { *write = tree, write++; if(tree->left != NULL) treetoarray(tree->left, NULL); if(tree->right != NULL) treetoarray(tree->right, NULL); } return 0; } int compare(const void *x, const void *y) { struct tnode * const *a = x; struct tnode * const *b = y; if(a == NULL || b == NULL) return -2; else { if((*a)->count < (*b)->count) return 1; else if((*a)->count > (*b)->count) return -1; else return 0; } } int tnodecount(struct tnode *p) { if(p == NULL) return 0; else { if(p->left == NULL && p->right == NULL) return 1; else return(1 + (tnodecount(p->left) + tnodecount(p->right))); } }