/*           POTM ENTRY:  branch_and_hang                      */
/*                        Jim Roche                            */
/*                     roche@research.att.com                  */
/*  This program should be ready to go after a simple "cc".    */

/*  ACKNOWLEDGEMENT: Thanks to Allan Wilks for making this     */
/*  program substantially faster and infinitely more readable. */


/*
This program attempts to unshuffle a deck of cards.  Its single input
argument is a representation of a shuffled deck with an even number,
N=2n, of cards designated by the first n upper case and first n lower
case letters of the alphabet.  Unshuffled order has the upper case
letters first, in alphabetical order, followed by the lower case
letters.  The program prints a single line of output consisting of a
sequence of letters from the set {S,C,F} where S represents a shuffle,
C cut and F a flip (see below for precise definitions).  When the
indicated operations are applied to the input deck in the given order,
a new deck will result that is either in unshuffled order, or as close
as the program could come to unshuffled order, in the sense of having
the greatest number of cards in their proper positions.

Some observations:

o F commutes with both C and S, so at most one F is needed, and if present,
  can appear anywhere in the sequence.

o CC is the identity so C should never be adjacent to another C

o S^p is the identity for some p depending on N, so no more than p-1 S's
  should ever be adjacent; p is the order of 2 mod N+1

o The order of CS is the order of 2 mod N-1.

o A deck is always "centrally symmetric," which means that if the unshuffled
  cards are numbered from 0 to N-1, then S, C and F all preserve the property
  that the cards in positions k and N-k-1 add to N-1.

o The symmetry property implies that the total number of possible decks
  is at most n!2^n, which is considerably smaller than N!.  For most
  values of N, the total number of decks that are actually achievable
  is either the same as the upper bound, half of this number, or one
  quarter of this number.  When N = 24, however, the group size is 
  quite a bit smaller than the upper bound.  Finally, when N is a power 
  of 2, the number of possible decks is only N*log_2(N).
  
o A good reference is "The Mathematics of Perfect Shuffles" by Diaconis,
  Graham and Kantor in Advances in Applied Mathematics, volume 5, 1983,
  pages 175-196.  In particular, from their results one can easily
  determine precisely what each shuffle group is.  They also have
  a good bibliography on the subject.

The general strategy is as follows.

o Compute a table of all decks reachable from an ordered deck in exactly
  MAXDEPTH moves of S or C.  If a table entry is thought of as a deck,
  the vector (a_0, a_1, ... , a_{N-1}) means that the top card is a_0,
  the next card is a_1, etc.  However, we actually interpret each vector
  to mean that the given sequence of moves takes card 0 into location
  a_0, card 1 into position a_1, etc.  With this interpretation, each
  table entry represents a deck _from which_ the identity (i.e., the
  ordered deck) can be reached in at most MAXDEPTH moves.

o Search for the best sequence by looking at decks reachable from
  the input deck in at most MAXLEVEL steps of S or C, plus possibly a
  flip.

o For each such deck, D, first see how close D is to the identity (perfect,
  unshuffled deck) and keep track of the best such match.

o Second, try a meet-in-the-middle attack.  For each deck D above,
  reinterpret it as a permutation using the notation we used for the
  table.  (In order for this to work, we must replace D by its
  inverse permutation, E.)  Now if E is found in the table, the move
  sequence that takes the input deck into D can be
  followed by the previously stored move sequence that takes D into
  the ordered deck; together the two sequences unscramble the input
  deck completely.

  For large decks such an exact solution is unlikely to be found, so we
  would like to find in the table the deck that is closest to E.  In
  order to avoid an expensive search, we only compare the first three
  elements of E with the tabled decks and search for the best table
  element among such matches.

o After this search we have a candidate solution.  If it is not a perfect
  solution, it is fed back through the same procedure, standing in as
  the new input deck.  We expect to get roughly 1-3 more matches in each
  half deck this way.  We arrange to spend roughly half the allotted
  time of 10 minutes on each of the two stages.
*/

/*
 * Contest parameters.
 */
#define MAXTIME		570	/* max number of seconds allowed (really 600) */
#define MAXLENGTH	500	/* maximum move sequence allowed */
#define MAXDECK		52	/* largest deck size */

#define MININIT		1
#define MAXINIT		3
#define MINDEPTH	2
#define MAXDEPTH	24   
#define MAXLEVEL	28   
#define MAXLOOP		2
#define SSIZE		130000
#define EASY		(N <= 10 || 32%N == 0)

/*
 * The jst structure stores a deck and the sequence of moves that lead to
 * it.  Decks are always stored as a sequence of integers in the range
 * [0,...,N-1].  A move sequence in this structure records only S and C
 * moves and represents these as bits in a bit vector, with 0 representing
 * S and 1 representing C.  It is interesting to compute the number of
 * possible sequences of length k, not allowing two consecutive Cs but
 * allowing any number of consecutive Ss.  This turns out to be the
 * (k+2)th Fibonacci number, for a word of length i is either S followed
 * by a word of length i-1 or CS followed by a word of length i-2.  The
 * number of such words of length k is therefore round(phi^(k+2)/sqrt(5)),
 * where phi is the golden ratio (1+sqrt(5))/2.  Here are some actual
 * values for given values of k:
 *
 * 	k     #seq
 * 	20   17711
 * 	21   28657
 * 	22   46368
 * 	23   75025
 * 	24  121393
 * 	25  196418
 * 	26  317811
 * 	27  514229
 * 	28  832040
 * 	29 1346269
 * 	30 2178309
 *
 * The S[] table (annoying notational confusion) stores all move sequences
 * up to length MAXDEPTH, and the I table is a triply-indexed array of
 * pointers into S to facilitate fast lookup.
 */
struct jst {
	char x[MAXDECK];
	unsigned nummoves:5;
	unsigned movebits:27;
} S[SSIZE], *I[MAXDECK*MAXDECK*MAXDECK], *Send;

/*
 * N and n are the deck size and half-deck size.
 * M is used in the special jcmp() routine.
 * nS is the final size of the S[] table.
 * ind[] is used in the qsort.
 * final[] stores the final move sequence.
 */
int N, n, M, nS, ind[SSIZE];
char final[MAXLENGTH+1];

/*
 * Routines for manipulating decks:
 *	Copy: copy one deck to another
 * 	Inv: invert a deck
 * 	Compose: compose two decks to produce a third
 * 	Cut: perform a cut
 * 	Shuffle: perform a shuffle
 * 	Flip: perform a flip
 */
#define Copy(old,new)	memcpy(new,old,N)

Inv(old, new)
char *old, *new;
{
	register int i = N;

	while(i--)
		new[old[i]] = i;
}

Compose(old, perm, new)
char *old, *perm, *new;
{
	register int i = N;

	while(i--)
		new[i] = old[perm[i]];
}

Cut(old, new)
char *old, *new;
{
	register int i, j;

	for(i = 0, j = n; i < n; i++, j++) {
		new[i] = old[j];
		new[j] = old[i];
	}
}

Shuffle(old, new)
char *old, *new;
{
	register int i, j;

	for(i = 0, j = n; i < N; i += 2, j++)
		new[i] = old[j];
	for(i = 1, j = 0; j < n; i += 2, j++)
		new[i] = old[j];
}

Flip(old, new)
char *old, *new;
{
	register int i, j;

	for(i = 0, j = N-1; i < N; i++, j--)
		new[i] = old[j];
}

/*
 * Compute the number of matches between two decks.  Because of
 * central symmetry, we only need to compute the number of matches
 * in the first half of the deck.
 */
matches(x, y)
char *x, *y;
{
	register int i, t = 0;

	for(i = 0; i < n; i++)
		if(x[i] == y[i])
			t++;
	return t;
}

/*
 * Compare routine used to sort the S[] table: lexicographic order.
 */
cmp(a,b)
int *a, *b;
{
	return jcmp(S+*a, S+*b);
}

/*
 * Partial lexicographic compare of two decks; only check first M cards.
 */
jcmp(a,b)
struct jst *a, *b;
{
	register int i, diff = 0;

	for(i = 0; i < M && !diff; i++)
		diff = a->x[i] - b->x[i];
	return diff;
}

/*
 * Construct the table.  There are three parts: (1) get the list of decks;
 * (2) sort the list; (3) delete duplicates.
 */
maketable(depth)
int depth;
{
	register int i, j, k, nperm = 0, down = 1, level = 1;
	register unsigned movebits = 0;
	char deck[MAXDEPTH+1][MAXDECK];
	struct jst Stmp;

	/*
	 * First stage: construct the list of decks and their sequences.
	 * Use a stack of decks, deck[0], deck[1], ..., where deck[0]
	 * contains the unshuffled deck.  During the construction, if the
	 * current sequence is of length k then deck[1], deck[2], ...,
	 * deck[k] contain the decks that result after each move of the
	 * sequence.
	 */
	for(i = 0; i < N; i++)
		deck[0][i] = i;

	/*
	 * The down variable says that the next move should be a shuffle.
	 * The else-if takes care of eliding double cuts.
	 */
	while(level) {
		if(down) {
			movebits <<= 1;
			Shuffle(deck[level-1], deck[level]);
		} else if(movebits & 3) {
			movebits >>= 1;
			level--;
		} else {
			movebits++;
			Cut(deck[level-1], deck[level]);
			down = 1;
		}
		if(down)
			level++;
		if(level > depth) {
			down = 0;
			level--;
			Copy(deck[level], S[nperm].x);
			S[nperm].movebits = movebits;
			S[nperm].nummoves = level;

			/* safety valve: SSIZE should be big enough */
			if(++nperm >= SSIZE)
				break;
		}
	}

	/*
	 * Second stage: sort the decks in lexicographic order.  Sort a
	 * vector of integers rather than the structures themselves to
	 * prevent a lot of unnecessary copying.  After the qsort, swap
	 * the structures themselves, but only at most once each.
	 */
	for(i = 0; i < nperm; i++)
		ind[i] = i;
	M = N;
	qsort(ind, nperm, sizeof(int), cmp);
	for(i = 0; i < nperm; i++) {
		if(ind[i] < 0)
			continue;
		Stmp = S[i];
		j = i;
		while(1) {
			S[j] = S[ind[j]];
			k = ind[j];
			if(ind[j] == i)
				break;
			ind[j] = -1;
			j = k;
		}
		S[j] = Stmp;
		ind[j] = -1;
	}

	/*
	 * Delete duplicate decks.  Since each deck corresponds to
	 * a move sequence of the same length, it doesn't matter which
	 * one is kept, so keep the first one.
	 */
	k = 0;
	for(i = 1; i < nperm; i++)
		if(jcmp(S+k, S+i))
			S[++k] = S[i];

	/*
	 * The global nS now records the final size of the S[] table
	 * and Send is a convenient marker for its end.
	 */
	nS = k+1;
	Send = S + nS;
}

/*
 * Make the index for the table.  Think of I as a three-way array, where
 * I[i][j][k] is a pointer to the first deck in the S[] table whose first
 * three cards are i,j,k, or if there is no such deck, the first deck after
 * such decks, were they there, or if there is no such deck, 0.
 */
makeindex()
{
	register int i, j, k;
	register char *x;

	j = 0;
	for(i = 0; i < nS; i++) {
		x = S[i].x;
		k = x[2] + MAXDECK*(x[1] + MAXDECK*x[0]);
		while(j <= k)
			I[j++] = S+i;
	}
	while(j < MAXDECK*MAXDECK*MAXDECK)
		I[j++] = 0;
}

/*
 * To search the table for a deck x[], use the index to jump into it.
 * If the first M spots match then return the pointer, else 0.
 */
struct jst *
search(x)
char *x;
{
	register struct jst *p;

	p = I[x[2] + MAXDECK*(x[1] + MAXDECK*x[0])];
	return (p && memcmp(p->x,x,M) == 0) ? p : 0;
}

/*
 * Convert a bit-vector representation of a move sequence into a
 * sequence of 'S' and 'C' characters, in preparation for printing.
 */
char *
decode(bits, m, F)
unsigned bits;
int m, F;
{
	register int i;
	static char seq[50];
	char *s = seq;

	if(F)
		*s++ = 'F';
	while(m--)
		*s++ = (bits & (1 << m)) ? 'C' : 'S';
	*s = 0;
	return seq;
}

/*
 * Keep track of number of elapsed seconds.  The 100 should be looked
 * up in limits.h, I suppose.
 */
#include 
#include 
int
Elapsed()
{
	struct tms t;

	times(&t);
	return (t.tms_stime+t.tms_utime)/100;
}

main(argc, argv)
int argc;
char **argv;
{
	char ideck[MAXLEVEL+2][MAXDECK], *p, *q;
	char tmpdeck[MAXDECK], tmpdeck2[MAXDECK], id[MAXDECK], rev[MAXDECK];
	char allan[MAXDECK], jim[MAXDECK];
	unsigned movebits, bestbits, bestbits2, bestbits3;
	int bestlevel, bestlevel2, bestlevel3;
	int bestF, bestF2, mm2;
	int maxlevel, nummatches, maxmatches, nm2;
	int down, level, i, j, fflag, lastbit, firstbit, ntimes = 0, nloop = 0;
	struct jst *qs, *qlo, *qhi;

	N = strlen(argv[1]);
	n = N/2;

	/* translate letters into numbers */
	for(p = argv[1], q = ideck[0]; *p; p++, q++)
		*q = *p < 'a' ? *p-'A' : *p-'a'+n;

	/* make up two standard decks: pristine and its flipped version */
	for(i = 0; i < N; i++)
		rev[N-1-i] = id[i] = i;

	/* construct the table and its index */
	maketable(EASY ? MINDEPTH : MAXDEPTH);
	makeindex();

top:
	/*
	 * This outer loop is done MAXLOOP times, using as input deck at
	 * each stage the best deck found from the previous stage.
	 */
	++nloop;
	maxmatches = matches(id, ideck[0]);
	nummatches = matches(rev, ideck[0]);
	if(nummatches > maxmatches) {
		maxmatches = nummatches;
		bestF = 1;
	}
	if(maxmatches == n)
		goto done;
	movebits = bestbits = bestbits2 = bestbits3 = 0;
	bestlevel = bestlevel2 = bestlevel3 = 0;
	bestF = bestF2 = mm2 = 0;
	M = EASY ? MININIT : MAXINIT;

	/*
	 * Look at all sequences of length <=2, <=4, ....  Notice that
	 * each iteration of the loop looks at all sequences seen in the
	 * previous iteration.  This seems like a lot of repeat work,
	 * but because of the geometric growth of the number of sequences,
	 * most of the work is done in the last iteration.  The advantage
	 * of doing it this way is that a short, perfect solution will be
	 * found quickly.
	 */
	for(maxlevel = 2; maxlevel <= MAXLEVEL; maxlevel += 2) {
		down = 1;
		level = 1;
		while(level) {
			if(down) {
				movebits <<= 1; 
				Shuffle(ideck[level-1], ideck[level]);
			} else if(movebits & 3) {
				movebits >>= 1;
				level--;
			} else {
				movebits++;
				Cut(ideck[level-1], ideck[level]);
				down = 1;
			}
			if(down && level > maxlevel - 2) {

				/*
				 * Now that we have a deck, we will check both
				 * it and its flipped version.  Tmpdeck2 holds
				 * the deck to be checked.
				 */
				for(fflag = 0; fflag < 2; fflag++) {
					if(fflag)
						Flip(ideck[level], tmpdeck2);
					else
						Copy(ideck[level], tmpdeck2);

					/*
					 * Check occasionally that time isn't
					 * running out.  Partition the total
					 * allowed time evenly between the
					 * iterations of the main loop.
					 */
					if(++ntimes%10000 == 0 &&
					    Elapsed() > (nloop*MAXTIME)/MAXLOOP)
						goto done;

					/*
					 * Check first to see if tmpdeck2 is
					 * is better than the previous best
					 * deck.
					 */
					nummatches = matches(id, tmpdeck2);
					if(nummatches > maxmatches) {
						maxmatches = nummatches;
						bestbits = movebits;
						bestlevel = level;
						bestF = fflag;
					}

					/*
					 * Now try meet-in-the-middle.  Prepare
					 * tmpdeck as the inverse of tmpdeck2.
					 */
					Inv(tmpdeck2, tmpdeck);

					/*
					 * Find table entries that match tmpdeck
					 * in the first M spots.
					 */
					qlo = qhi = search(tmpdeck);
					if(qlo == 0)
						continue;
					while(qlo > S && jcmp(qlo,qlo-1) == 0)
						qlo--;
					qhi++;
					while(qhi < Send && jcmp(qhi,qhi-1) == 0)
						qhi++;

					/*
					 * For each of these, see how many
					 * spots actually match, and remember
					 * the best one overall.
					 */
					for(qs = qlo; qs < qhi; qs++) {
						nm2 = matches(qs->x, tmpdeck);
						if(nm2 <= mm2)
							continue;
						mm2 = nm2;
						bestbits2 = movebits;
						bestbits3 = qs->movebits;
						bestlevel2 = level;
						bestlevel3 = qs->nummoves;
						bestF2 = fflag;
					}
				}
			}
			if(down)
				level++;
			if(level > maxlevel) {
				down = 0;
				level--;
			}

			/*
			 * Check for perfect solution with the direct
			 * approach.  We do not check for perfect meet-in-the-
			 * middle solution because breaking out now might
			 * prevent finding a shorter direct solution.
			 */
			if(maxmatches == n)
				goto done;
		}
	}

done:
	/*
	 * Build and print the sequence computed in the current
	 * iteration of the main loop.
	 */
	final[0] = 0;
	if(maxmatches >= mm2) {
		if(bestlevel || bestF)
			strcat(final, decode(bestbits, bestlevel, bestF));
		else
			strcat(final, "FF");
	} else {
		lastbit = bestbits2 & 1;
		firstbit = (bestbits3 >> (bestlevel3 - 1)) & 1;
		if(lastbit & firstbit) {
			bestbits2 >>= bestbits2;
			bestlevel2--;
			bestlevel3--;
		}
		strcat(final, decode(bestbits2, bestlevel2, bestF2));
		strcat(final, decode(bestbits3, bestlevel3, 0));
	}
	printf("%s", final);

	/*
	 * Decide whether to continue trying to improve.  If so, apply
	 * the full sequence so far to the starting deck, and use the
	 * result as input to the next iteration.  Note that the starting
	 * deck gets overwritten with a new starting deck.
	 */
	if(maxmatches < n && mm2 < n && nloop < MAXLOOP) {
		Copy(ideck[0], allan);
		for(i = 0; i < strlen(final); i++) {
			switch(final[i]) {
			case 'S': Shuffle(allan, jim); Copy(jim, allan); break;
			case 'C': Cut(allan, jim); Copy(jim, allan); break;
			case 'F': Flip(allan, jim); Copy(jim, allan); break;
			}
		}
		Copy(allan, ideck[0]);
		goto top;
	}
	putchar('\n');
}