/* POTM ENTRY: SnakePit * Name: Tim Ruhl * Email: tim@cs.vu.nl * Compilation: Should work with cc and gcc. */ #define NDEBUG #include #include #include #include #include #include #include /* Constants */ #define NR_TIMEOUT 1 #define TIMEOUT 590 #define POP_SIZE 200 #define MUTATE 350 #define INIT_WORDS 5 /* Types */ typedef struct board { char fields[25]; } board_t, *board_p; typedef struct member { /* Member of the genetic population */ board_t b; int score; } member_t, *member_p; /* Constant variables */ static int moves[8] = {1, -1, 5, -5, 6, 4, -6, -4}; static int nb[25][8]; static int nrnb[25]; /* Variables */ static char *progname; static int timeout = TIMEOUT; static int nr_timeout = NR_TIMEOUT; static int running; static int verbose; static int give_score; static int dump_eval; /* Word list data structures */ static char words[1000][26]; static int wordlen[1000]; static int wordcnt[1000]; static int next[1000][25][26]; static int nr_words; static int freq[26]; static int sum_freq; /* Word finder */ static int word_used[1000]; static int used[49]; static int letters, count; static int seqno; static int print_words; /* Genetic population */ static member_t pop[2][POP_SIZE]; static int curr; static member_t best; /* Macros */ #define ABS(x) ((x) < 0 ? -(x) : (x)) /* Functions */ static void usage(void) { fprintf(stderr, "Usage: %s " "[-r ] " "[-t ] " "[-T ]" "[-vdse] " "\n", progname); exit(1); } /* handler: * Handle SIGALRM. Set running to zero which causes the * computation to terminate. */ static void handler(int s) { running = 0; } /* init_neighbors: * Precompute the neighbor list for all fields. */ static void init_neighbors(void) { int row1, col1, row2, col2; int i, j, p; for(i = 0; i < 25; i++) { row1 = i / 5; col1 = i % 5; for(j = 0; j < 8; j++) { p = i + moves[j]; if (p < 0 || p >= 25) continue; row2 = p / 5; col2 = p % 5; if (ABS(row1 - row2) <= 1 && ABS(col1 - col2) <= 1) { nb[i][nrnb[i]++] = p; } } } } static void read_words(char *file) { FILE *fp; if ((fp = fopen(file, "r")) == NULL) { fprintf(stderr, "Cannot open word file %s\n", file); exit(1); } nr_words = 0; while(fscanf(fp, "%s", words[nr_words]) == 1) { nr_words++; } fclose(fp); } static void compute_wordlen(void) { int i; for(i = 0; i < nr_words; i++) { wordlen[i] = strlen(words[i]); } } /* compute_freq: * Get the number of occurences of each letter in the * wordlist. Used to select random letters. */ static void compute_freq(void) { int i, j; for(i = 0; i < nr_words; i++) { for(j = 0; j < wordlen[i]; j++) { freq[words[i][j] - 'A']++; } sum_freq += wordlen[i]; } } static void sort_words(void) { char buf[26]; int i, j; int t; for(i = 0; i < nr_words; i++) { for(j = i + 1; j < nr_words; j++) { if (strcmp(words[i], words[j]) > 0) { strcpy(buf, words[i]); strcpy(words[i], words[j]); strcpy(words[j], buf); t = wordlen[i]; wordlen[i] = wordlen[j]; wordlen[j] = t; } } } } /* filter_duplicates: * Filter duplicate words. According to the FAQ, this is * not necessary, but I never removed it. */ static void filter_duplicates(void) { int i, j; j = 0; wordcnt[0] = 1; for(i = 1; i < nr_words; i++) { if (strcmp(words[i], words[j]) == 0) { wordcnt[j]++; continue; } j++; strcpy(words[j], words[i]); wordcnt[j] = 1; wordlen[j] = wordlen[i]; } nr_words = j + 1; } /* compute_next: * For each word and each prefix length, compute the * index of the first word that has the same prefix plus * a letter. */ static void compute_next(void) { int i, j, k, l; int high; for(i = 0; i < nr_words; i++) { for(j = 0; j <= wordlen[i]; j++) { for(k = i + 1; k < nr_words; k++) { if (strncmp(words[i], words[k], j) != 0) break; } high = k; for(k = 0; k < 26; k++) { next[i][j][k] = -1; for(l = i; l < high; l++) { if (words[l][j] == 'A' + k) { next[i][j][k] = l; break; } } } } } } /* random_letter: * Generate a random letter. The selection uses the * frequency counts of the letters to select letters that * are likely to occur. */ static char random_letter(void) { int s, t; int i; s = 1 + (rand() % sum_freq); t = 0; for(i = 0; i < 26; i++) { t += freq[i]; if (t >= s) { assert(freq[i]); return 'A' + i; } } assert(0); return 'A'; } /* impose_word: * Tries to place a word on the given board. It does not * deal with overwriting its own letters. */ static void impose_word(board_p b, int p, int w, int offset) { int s; if (offset == wordlen[w]) return; b->fields[p] = words[w][offset]; s = rand() % nrnb[p]; impose_word(b, nb[p][s], w, offset + 1); } /* random_word: * Select a random word and place in on the board. * */ static void random_word(board_p b) { int w, p; w = rand() % nr_words; p = rand() % 25; impose_word(b, p, w, 0); } /* random_board: * Generate a random solution by placing random letters * on the board. After that, also some random words are * placed. */ static void random_board(board_p b) { int i; for(i = 0; i < 25; i++) { b->fields[i] = random_letter(); } for(i = 0; i < INIT_WORDS; i++) { random_word(b); } } static void print_board(board_p b) { int i; for(i = 0; i < 25; i++) { putchar(b->fields[i]); if ((i + 1) % 5 == 0) putchar('\n'); } } /* walk_path: * Try to find a path on the board that contains a word * from the wordlist. low specifies the first word in the * sorted wordlist that has a prefix of length len that * contains the letters on the current path. Avoid * duplicate words by assigning a sequence number to the * words_used table. If the table entry for a word is * equal to the current sequence number, the word has * already been used. */ static void walk_path(board_p b, int p, int low, int len) { int new; int i; /* Check whether there are any words with the current prefix. */ low = next[low][len][b->fields[p] - 'A']; if (low == -1) return; if (wordlen[low] == len + 1 && word_used[low] != seqno) { word_used[low] = seqno; if (print_words) { for(i = 0; i < wordcnt[low]; i++) { printf("%s\n", words[low]); } } letters += (len + 1) * wordcnt[low]; count += wordcnt[low]; } for(i = 0; i < nrnb[p]; i++) { new = nb[p][i]; if (used[new]) continue; used[new] = 1; walk_path(b, new, low, len + 1); used[new] = 0; } } /* walk_board: * Count the number of words and letters on the given * board. The sequence number is used to detect * duplicates. */ static void walk_board(board_p b) { int i; letters = count = 0; seqno++; for(i = 0; i < 25; i++) { used[i] = 1; walk_path(b, i, 0, 0); used[i] = 0; } } static void print_member(member_p m) { if (verbose) printf("################\n"); print_board(&m->b); if (verbose) printf("################\n"); print_words = 1; walk_board(&m->b); print_words = 0; if (verbose || give_score) { printf("Score: %d letters with %d words\n", letters, count); } if (verbose) printf("\n"); } /* eval_pop: * Dump statistics for the current population. */ static void eval_pop(int p) { int sum, sumsq, avg, var, std; int count[20]; int max; int i; sum = 0; sumsq = 0; max = 0; for(i = 0; i < POP_SIZE; i++) { sum += pop[p][i].score; sumsq += pop[p][i].score * pop[p][i].score; if (pop[p][i].score > max) max = pop[p][i].score; } for(i = 0; i < 20; i++) count[i] = 0; for(i = 0; i < POP_SIZE; i++) { count[(pop[p][i].score * 20 - 1) / max]++; } printf("Population score histogram (max %d; population size %d):\n", max, POP_SIZE); for(i = 0; i < 20; i++) { printf("%4d", count[i]); } printf("\n"); avg = sum / POP_SIZE; var = sumsq / POP_SIZE - (avg * avg); std = sqrt(var) * 100 / avg; printf("Average: %d variance: %d normalized standard deviation: %d\n", avg, var, std); } /* init_pop: * Generate the initial population, consisting of * POP_SIZE members. */ static void init_pop(void) { board_p b; int i; for(i = 0; i < POP_SIZE; i++) { b = &pop[0][i].b; random_board(b); walk_board(b); /* Add one bonus point to be able to randomly select members */ pop[0][i].score = 1 + letters * 10 - count; if (pop[0][i].score > best.score) { best = pop[0][i]; } } } /* find_random: * Find a weighted random member of the population. Array * sum contains the sum of the score of the current * member and all members before it. Using this * accumulative array, a binary search is applied that * finds a member. */ static int find_random(int sum[POP_SIZE + 1], int total) { int low, high, middle; int s; s = 1 + (rand() % total); low = 0; high = POP_SIZE + 1; middle = POP_SIZE / 2; while(sum[middle] < s || sum[middle - 1] > s) { if (sum[middle] < s) { low = middle; } else { high = middle; } middle = low + (high - low) / 2; assert(middle > 0 && middle <= POP_SIZE); assert(middle >= low); assert(middle <= high); assert(low <= high); } middle--; assert(middle >= 0 && middle < POP_SIZE); return middle; } /* new_member: * Generate a new member for the next population. First * two (different) members of the old population are * selected. The letters of the new member are selected * from one of the two old members, weighted by their * score. After that, the mutation factor decides whether * a new word should be added to the solution. I think * a mutation parameter of 35% is much too high to call * it a genetic algorithm, but this was the best value I * found for my testcase. */ static void new_member(int p, member_p new, int sum[], int total) { board_p b, b1, b2; member_p m1, m2; int score; int i; /* All scores are greater than 0 */ assert(total > 0); m1 = &pop[p][find_random(sum, total)]; do { m2 = &pop[p][find_random(sum, total)]; }while(m1 == m2); b = &new->b; b1 = &m1->b; b2 = &m2->b; score = m1->score + m2->score; for(i = 0; i < 25; i++) { if ((rand() % score) < m1->score) { b->fields[i] = b1->fields[i]; } else { b->fields[i] = b2->fields[i]; } } if ((rand() % 1000) < MUTATE) { random_word(b); } walk_board(b); /* Add one bonus point to randomly select members */ new->score = 1 + letters * 10 - count; if (new->score > best.score) { best = *new; if (verbose) print_member(&best); } } /* next_pop: * Based on the current population, compute the * next. First precompute the prefix sums of the * scores. Also put the best solution found so for in the * next population. */ static void next_pop(void) { int sum[POP_SIZE + 1]; int total; int next; int i; total = 0; sum[0] = 0; for(i = 0; i < POP_SIZE; i++) { total += pop[curr][i].score; sum[i + 1] = sum[i] + pop[curr][i].score; } next = 1 - curr; pop[next][0] = best; for(i = 1; i < POP_SIZE; i++) { new_member(curr, &pop[next][i], sum, total); } curr = next; } int main(int argc, char **argv) { int dump_pop = 0; int i; progname = argv[0]; argv++; argc--; while(argc && argv[0][0] == '-') { switch(argv[0][1]) { case 'r': if (argc < 2) usage(); srand(atoi(argv[1])); argv++; argc--; break; case 't': if (argc < 2) usage(); timeout = atoi(argv[1]); argv++; argc--; break; case 'T': if (argc < 2) usage(); nr_timeout = atoi(argv[1]); argv++; argc--; break; case 'd': dump_pop = 1 - dump_pop; break; case 'v': verbose = 1 - verbose; break; case 's': give_score = 1 - give_score; break; case 'e': dump_eval = 1 - dump_eval; break; default: usage(); } argv++; argc--; } if (argc != 1) { usage(); } init_neighbors(); read_words(argv[0]); compute_wordlen(); compute_freq(); sort_words(); filter_duplicates(); compute_next(); init_pop(); if (verbose) print_member(&best); if (dump_eval) eval_pop(0); /* Generate statistics more than once. */ while(nr_timeout--) { running = 1; signal(SIGALRM, handler); alarm(timeout); while(running) { next_pop(); } if (dump_eval) eval_pop(curr); } if (dump_eval) eval_pop(curr); print_member(&best); if (dump_pop) { printf("++++++++++++++++++++ Population dump +++++++++++++++++++\n"); for(i = 0; i < POP_SIZE; i++) { printf("Member %d\n", i); print_member(&pop[curr][i]); } } return 0; } /* Local Variables: compile-command: "gcc -g -ansi -Wall -pedantic -o b1 b1.c -lm" End: */