/* pringle.c -- Palith Balakrishnabati (fixed version) Techniques used: 1) Search control via (nondeterministic) FSM simulation: build a FSM that accepts strings of digits in the grid. Each state of the FSM corresponds to one of the 25 grid positions; a set of stated can be stored in a 32-bit machine word. Thus, pringle never has to muck around in the actual grid itself once the FSM is built. 2) Prime testing via: a) checks for trivial factors (2,3,5) "built into" FSM traversal b) checks for "small" factors (primes < 200) accomplished in batches via GCD c) checks for large factors by Miller-Rabin strong-psuedo prime testing to bases 2, 3, and 7 3) Loop unrolling and other garden-variety eficiency hacks. SIMPLIFIED SEARCH CONTROL OUTLINE: ========================================================== read grid and build FSM /* map[i] = set of grid positions containing the digit i * for (n = 9; n > 0; --n) { search (fullboard, 0, n) /* search for an n-digit prime in the grid * / if found print and halt } search (states, number, depth) { if (depth == 0) { test number for primality; if prime, we're done return } neighbors = bits adjacent to 1-bits in states /* easily found by a little SHIFTing and ORing * / for (i = 9; i >= 0; --i) { newstates = neighbors & map[i]; if (newstates) search (newstates, number * 10 + i, depth - 1) } ========================================================== This reduces search contol to a small percentage of the overall time. Repeated paths in the grid that form the same number do not slow down the search at all, and I visit candidate primes in strictly decreasing orders, so I can stop when I find the first one. PRIME TESTING The search control is just a small fraction of the time, so I worked at making the prime testing efficient. I build an array mask[n][k] which gives the set of squares from which there is an n-step path through the grid ending in a 1, 3, 7, or 9, with residue -k mod 3. By keeping track of number mod 3 as I go and ANDing in with the mask[][] values, I am assured that I am always going to find at least one number not divisible by 2, 3, or 5 in the current branch of the search tree, and that I never end up at number divisible by 2, 3, or 5. This helps in tricky squares such as the "all 1, 4, 7". To test for divisiblity by 5 > p > 200, I compute GCDs. On a SPARC, where integer division is slow as dogmeat, a well-coded GCD (using, say, the binary algorithm) is about as fast as one divide. But in a single GCD, we can test for divisibility by many primes at once: GCD (n, 7 * 11 * 13 * 17 * 23 * 29) for example, will be 1 unless n is divisible by one of these primes. By multiplying sets of primes less than 200, I can "pack" a set of test integers and do a bunch of divisibility tests with a single GCD. The value 200 was detemined empirically as a point where returns diminished. This table of packed prime products is computed just once before the search begins. For large divisors, I use the probablistic strong pseudo-prime test. Using bases 2, 3, and 7, there is only one SPP less than a billion, and it cannot be the largest prime in the grid: this is because any grid containing this number also contains a larger prime. For example, the number 789201803 can never be the largest prime in the grid, because any grid containing that number also contains the prime 989898989. Almost all of the work is in multiplying x * y mod m. I use a simple by-hand bit-at-a-time multiplication algorithm, with a fully unrolled loop and a few tricks to make it efficient on a SPARC (e.g. testing (x&512) is much cheaper than testing (x&1024). That's about it. There's special-case code to take care of the "funny" grids where all the digits have a common prime divisor. Unfortunately, the version of pringle I submitted got the wrong answer on the test "HOT DOG" because I bungled the code to make sure that I start from only nonzero digits. Rats. This version has that bug fixed. --Palith */ /* FULLBOARD: the 25 bits of an integer word that correspond to grid positions. I leave a one-bit gap between rows to prevent wraparound when shifting to compute neighbors */ #define FULLBOARD (0x1f7df7df) /* MAXCOMP: largest prime to test for using GCD NCOMP: table of composites for Sieve of Eratosthenes MAXMUL: max number of primes to pack into a composite MUL_LIMIT: largest number to pack */ #define MAXCOMP 200 #define NCOMP ((MAXCOMP-3)/2) #define MAXMUL 8 #define MUL_LIMIT (((unsigned) 4294967295) / MAXCOMP) /* comp: table of odd composites (used in finding primes to pack) pmul: table of packed prime products for GCD testing nmul: number of entries in pmul */ char comp[NCOMP]; unsigned int pmul[MAXMUL]; int nmul; /* digtab, rawdigtab: tables of info per digit 0-9; digtab is just rawdigtab packed so that only digits that actually appear in the grid are present */ struct { int dig; /* which digit 0-9 */ unsigned int map; /* set of grid positions where dig appears */ int resmap[3]; /* resmap[k] = (k * 10^dig) mod 3 */ } digtab[10], rawtab[10]; /* ndig: number of distinct digits appearing in the grid nonzero: subset of grid position that don't contain a 0 mask[n][k]: set of states from which there begins a path of length n in the grid ending in a 1, 3, 7, or 9 whose number string is -k mod 3. Used to keep the search away from numbers divisible by 2, 3, or 5. */ int ndig = 0; unsigned int nonzero; unsigned int mask[9][3]; /* the largest prime found, as an integer and as a string of chars */ int bestprime = 0; char bestprimetxt[10]; void dumpprime (void); void readboard (int f, char (*b)[5]); void buildboard (register char (*b)[5]); void init_pmul (void); void build_masks (void); void evalboard (register char (*b)[5]); int search (register unsigned int squares, register int depth, register int val, register int res); unsigned int adj (register unsigned int x); int isprime (register int n); int spsp (register int base, register int n); unsigned int gcd (register unsigned int x, register unsigned int y); int modmul (register unsigned int x, register unsigned int y, register unsigned int m); unsigned int mod3 (register unsigned int x); int main (int ac, char **av) { int f; char board[5][5]; int i, j, k; f = open (av[1], 0, 0); if (!f) { write (2, "Unable to read input\n", 21); _exit (0); } readboard (f, board); close (f); evalboard (board); dumpprime(); } /* Write bestprime on stdout */ void dumpprime (void) { char *p, *q; char bestprimerev[11]; if (bestprime == 7) write (1,"7\n",2); else if (bestprime == 5) write (1, "5\n",2); else if (bestprime == 3) write (1, "3\n",2); else if (bestprime == 2) write (1, "2\n", 2); else if (bestprime == 0) write (1, "0\n", 2); else { bestprimerev[9] = '\n'; bestprimerev[10] = '\0'; for (p = bestprimetxt, q = bestprimerev + 9; *p; p++) { *--q = *p; } write (1, q, p - bestprimetxt + 1); } } /* Read the board into b */ void readboard (int f, char (*b)[5]) { int i, j; char inbuf[1024], *p; int dig; read (f, inbuf, sizeof (inbuf)); p = inbuf; for (i=0; i<5; i++) { for (j=0; j<5; j++) { while (*p < '0' || *p > '9') p++; b[i][j] = *p++ - '0'; } } } /* Set up the FSM: build rawtab[].map, then compress out missing digits to form digtab[]. nonzero and ndig get computed in too. */ void buildboard (register char (*b)[5]) { register int dig; register int i, j; for (i=0; i<10; i++) { rawtab[i].map = 0; } for (i=0; i<5; i++) { for (j=0; j<5; j++) { rawtab[b[i][j]].map |= 1<<(6*i+j); } } nonzero = 0; for (i=9, ndig=0; i>=0; i--) { if (rawtab[i].map) { if (i != 0) nonzero |= rawtab[i].map; digtab[ndig].map = rawtab[i].map; digtab[ndig].dig = i; ndig++; } } } /* Set up the pmul[] table and nmul for the GCD-based divisibility tests. Compute rcomp via sieve, then pack primes into the pmul table */ void init_pmul (void) { register unsigned int i, j, k; register char *rcomp = comp; i = 0; for (;;) { while (rcomp[i] == 1) i++; j = 2*i+3; i++; k = (j*j-3)/2; if (k >= NCOMP) break; while (k < NCOMP) { rcomp[k] = 1; k += j; } } /* rcomp[i]==0 ==> (2i+3) prime */ for (i=0, j=2; i1; n--) { bestprimetxt[n] = '\0'; i = search (0, n, 0, 0); if (i) return; } return; } /* Main recursive search routine. squares: the current set of grid positions where we could possibly be depth: number of digits remaining val: decimal value of number formed so far res: residue mod 3 of number formed so far */ int search (register unsigned int squares, register int depth, register int val, register int res) { register int i; register unsigned int adjsq; register unsigned int nsq; register unsigned int adj2, adj3; if (depth == 0) { /* all digits filled */ if (isprime(val)) { /* check for prime */ bestprime = val; return 1; } else { return 0; } } depth--; if (val == 0) { /* val = 0 means this is the first digit */ adjsq = nonzero; /* ... so must start at nonzero digit */ } else { /* otherwise... */ val *= 10; /* adjust val */ adj2 = (squares<<1)|(squares>>1); /* and compute neigbors */ adj3 = adj2 | squares; adjsq = (adj2 | (adj3<<6) | (adj3>>6)) & mask[depth][res]; /* and with mask[][] to prune out 2,3,5 multiples */ } for (i = 0; i>1); register unsigned int adj3 = adj2|x; return (adj2 | (adj3>>6) | (adj3<<6)) & FULLBOARD; } /* Test for primality: first do GCDs with the entries in the pmul table, then do strong pseudoprime test with 2,3, and 7. */ int isprime (register int n) { register int i; for (i=0; i nm1; b >>= 1); b >>= 1; v = base; while (b >= bmin) { v = modmul (v, v, n); if (nm1&b) v = modmul (v, base, n); b >>= 1; } if (v == 1 || v == nm1) return 1; while (b) { v = modmul (v, v, n); b >>= 1; if (v == 1) return 0; if (v == nm1) return 1; } return 0; } /* Binary gcd algorithm */ unsigned int gcd (register unsigned int x, register unsigned int y) { for (;;) { if (x > y) { x -= y; do { x >>= 1; } while (!(x&1)); } else if (x < y) { y -= x; do { y >>= 1; } while (!(y&1)); } else { return x; } } } /* Compute x * y mod m. Uses the standard "bit-at-a-time" algorithm, with adjustment when the partial product or shifting multiplicand gets larger than m. Loop is unrolled all the way, with a few early exit tests to speed things up when y is small (say 2, 3, or 7). Avoids shifting y at each step, since testing y&1 on a SPARC is no faster than y&512 (though testing y&1024 is a lot slower, since 1024 is too big for an immediate operand). This routine is where most of the time is spent (for tough problems) so it makes sense to optimize tensely.*/ int modmul (register unsigned int x, register unsigned int y, register unsigned int m) { register unsigned int v = 0; if (y&1) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&2) { v += x; if (v >= m) v -= m; } if (!(y&~3)) return v; x += x; if (x >= m) x -= m; if (y&4) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (!(y&~7)) return v; if (y&8) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&16) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&32) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&64) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&128) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&256) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&512) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; y >>= 10; if (y&1) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&2) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&4) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&8) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&16) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&32) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&64) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&128) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&256) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&512) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; y >>= 10; if (y&1) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&2) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&4) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&8) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&16) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&32) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&64) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&128) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&256) { v += x; if (v >= m) v -= m; } x += x; if (x >= m) x -= m; if (y&512) { v += x; if (v >= m) v -= m; } return v; } /* Quicky mod3 code, uses "casting out threes" to fold up parts of the number by addition while preserving residue mod 3 */ unsigned int mod3 (register unsigned int x) { x = (x&0xffff)+(x>>16); /* 0 - 131070 */ x = (x&0xff)+(x>>8); /* 0 - 766 */ x = (x&0xf)+(x>>4); /* 0 - 62 */ x = (x&0x3)+(x>>2); /* 0 - 18 */ x = (x&0x3)+(x>>2); /* 0 - 7 */ while (x>=3) x-=3; return x; }