Sorting Knuth

C-Scene Issue #09
Sorting Knuth
Marc Tardif


This article should be considered as an appendix to Knuth's book, The Art of
Programming - Vol3, Sorting and Searching. It provides the C code to every
algorithm discussed at length in section 5.2, Internal Sorting. No explanations
are provided here, the book should provide all the necessary comments. The
following link is a sample implementation to confirm that everything is in
working order:

sknuth.c


About the code:
In order to remain as true as possible to the book, a few compromises were
taken into consideration. First, the use of goto statements, which are not
recommended as a good programming practice. Nevertheless, as K&R notes: "there
are a few situations where gotos may find a place". Second, Knuth uses arrays
which start at 1 and finish at N inclusively, which is also taken in consi-
deration. Lastly, be warned that there is no error detection in the code,
which is yet another terrible programming practice.

Now that you have been warned, on with the code:

5.2 INTERNAL SORTING

Comparison counting

C1:    for (i=1; i<=N; i+=1) COUNT[i]=0;
C2:        for (i=N; i>=2; i-=1) {
C3:            for (j=i-1; j>=1; j-=1) {
C4:                if (R[i]->K < R[j]->K) COUNT[j]+=1; else COUNT[i]+=1; } }

Distribution counting

D1:    for (i=u; i<=v; i+=1) COUNT[i]=0;
D2:    for (j=1; j<=N; j+=1)
D3:        COUNT[R[j]->K]+=1;
D4:    for (i=u+1; i<=v; i+=1) COUNT[i]+=COUNT[i-1];
D5:    for (j=N; j>=1; j-=1) {
D6:        i=COUNT[R[j]->K]; S[i]=R[j]->K; COUNT[R[j]->K]=i-1; }

5.2.1 Sorting by Insertion

Straight insertion sort

S1:    for (j=2; j<=N; j+=1) {
S2:        i=j-1; Kt=R[j]->K; Rt=R[j];
S3:        if (Kt>=R[i]->K) goto S5;
S4:        R[i+1]=R[i]; i-=1; if (i>0) goto S3;
S5:        R[i+1]=Rt; }

Shell's method

D1:    for (h=N/2; h>0; h/=2) {
D2:        for (j=h+1; j<=N; j+=1) {
D3:            i=j-h; Kt=R[j]->K; Rt=R[j];
D4:            if (Kt>=R[i]->K) goto D6;
D5:            R[i+h]=R[i]; i-=h; if (i>0) goto D4;
D6:            R[i+h]=Rt; } }

List insertion

L1:    R[0]->L=N; R[N]->L=0; for (j=N-1; j>=1; j-=1) {
L2:        p=R[0]->L; q=0; Kt=R[j]->K;
L3:        if (Kt<=R[p]->K) goto L5;
L4:        q=p; p=R[q]->L; if (p>0) goto L3;
L5:        R[q]->L=j; R[j]->L=p; }

5.2.2 Sorting by Exchanging

Bubble sort

B1:    BOUND=N;
B2:    t=0; for (j=1; j<=BOUND-1; j+=1) {
B3:        if (R[j]->K>R[j+1]->K) swap(R[j],R[j+1],Rt); t=j; }
B4:    if (t!=0) { BOUND=t; goto B2; }

Merge exchange

M1:    t=lg(N, HIGH); for (j=1, p=pow(2,t-1); p>=1; p=pow(2,t-(j++))) {
M2:        q=pow(2, t-1); r=0; d=p;
M3:        for (i=0; iK>R[i+d+1]->K) swap(R[i+1],R[i+d+1],Rt); }
M5:        if (q!=p) { d=q-p; q=q/2; r=p; goto M3; } }
M6:    p=p/2; if (p>0) goto M2;

Quicksort

Q1:    if (N<=M) goto Q9; S=0; l=1; r=N;
Q2:    i=l; j=r+1; K=R[l]->K;
Q3:    i+=1; if (R[i]->KK) goto Q4;
Q5:    if (j<=i) { swap(R[l],R[j],Rt); goto Q7; }
Q6:    swap(R[i],R[j],Rt); goto Q3;
Q7:    if (r-j>=j-l>M) { push(j+1,r); S+=1; r=j-1; goto Q2; }
       if (j-l>r-j>M) { push(l,j-1); S+=1; l=j+1; goto Q2; }
       if (r-j>M>=j-l) { l=j+1; goto Q2; }
       if (j-l>M>=r-j) { r=j-1; goto Q2; }
Q8:    if (S) { pop(l,r); S-=1; goto Q2; }
Q9:    for (j=2; j<=N; j+=1) {
           if (R[j-1]->K > R[j]->K) {
               K=R[j]->K; Rt=R[j]; i=j-1; R[i+1]=R[i];
                   while (R[i]->K>K && i>=1) i-=1;
                   R[i+1]=Rt; } }

Radix exchange sort

/* This algorithm, as implemented in the book, seems to have taken a wrong
 * turn at Albuquerque. Instead of reading each binary bit of each key from
 * left-to-right, they are read from right-to-left. To remedy this problem,
 * a few changes have been made to variable 'b' on lines R1 and R8. The code
 * seems to work now and Knuth has been advised of this potential problem.
 */

R1:    S=0; l=1; r=N; b=N;
R2:    if (l==r) goto R10; i=l; j=r;
R3:    if (R[i]->K & 1<<(b-1)) goto R6;
R4:    i+=1; if (i<=j) goto R3; goto R8;
R5:    if (!(R[j+1]->K & 1<<(b-1))) goto R7;
R6:    j-=1; if (i<=j) goto R5; goto R8;
R7:    swap(R[i],R[j+1],Rt); goto R4;
R8:    b-=1; if (b<0) goto R10;
       if (j=2; j-=1) {
S2:        for (i=j, k=j-1; k>=1; k-=1) if (R[k]->K>R[i]->K) i=k;
S3:        swap(R[i],R[j],Rt); }

Heapsort

H1:    l=N/2+1; r=N;
H2:    if (l>1) { l-=1; Rt=R[l]; Kt=R[l]->K; }
       else { Rt=R[r]; Kt=R[r]->K; R[r]=R[1]; r-=1;
           if (r==1) { R[1]=Rt; goto END; } }
H3:    j=l;
H4:    i=j; j*=2; if (jr) goto H8;
H5:    if (R[j]->KK) j+=1;
H6:    if (Kt>=R[j]->K) goto H8;
H7:    R[i]=R[j]; goto H4;
H8:    R[i]=Rt; goto H2;
END:

5.2.4 Sorting by Merging

Two-way merge

M1:    i=1; j=1; k=1;
M2:    if (x[i]->K<=y[j]->K) goto M3; else goto M5;
M3:    z[k]->K=x[i]->K; k+=1; i+=1; if (i<=m) goto M2;
M4:    while (k<=m+n || j<=n) z[k++]->K=y[j++]->K; goto END;
M5:    z[k]->K=y[j]->K; k+=1; j+=1; if (j<=n) goto M2;
M6:    while (k<=m+n || i<=m) z[k++]->K=x[i++]->K; goto END;
END:

Natural two-way merge sort

N1:    s=0;
N2:    if (s==0) { i=1; j=N; k=N+1; l=2*N; }
       if (s==1) { i=N+1; j=2*N; k=1; l=N; }
       d=1; f=1;
N3:    if (R[i]->K>R[j]->K) goto N8; if (i==j) { R[k]=R[i]; goto N13; }
N4:    R[k]=R[i]; k+=d;
N5:    i+=1; if (R[i-1]->K<=R[i]->K) goto N3;
N6:    R[k]=R[j]; k+=d;
N7:    j-=1; if (R[j+1]->K<=R[j]->K) goto N6; else goto N12;
N8:    R[k]=R[j]; k+=d;
N9:    j-=1; if (R[j+1]->K<=R[j]->K) goto N3;
N10:   R[k]=R[i]; k+=d;
N11:   i+=1; if (R[i-1]->K<=R[i]->K) goto N10;
N12:   f=0; d=-d; t=k; k=l; l=t; goto N3;
N13:   if (f==0) { s=1-s; goto N2; }
       if (s==0) for (t=1; t<=N; t+=1) { R[t]=R[t+N]; }

Straight two-way merge sort

S1:    s=0; p=1;
S2:    if (s==0) { i=1; j=N; k=N; l=2*N+1; }
       if (s==1) { i=N+1; j=2*N; k=0; l=N+1; }
       d=1; q=p; r=p;
S3:    if (R[i]->K>R[j]->K) goto S8;
S4:    k=k+d; R[k]=R[i];
S5:    i+=1; q-=1; if (q>0) goto S3;
S6:    k+=d; if (k==l) goto S13; else R[k]=R[j];
S7:    j-=1; r-=1; if (r>0) goto S6; else goto S12;
S8:    k+=d; R[k]=R[j];
S9:    j-=1; r-=1; if (r>0) goto S3;
S10:   k+=d; if (k==l) goto S13; else R[k]=R[i];
S11:   i+=1; q-=1; if (q>0) goto S10;
S12:   q=p; r=p; d=-d; t=k; k=l; l=t; if (j-iL=1; R[N+1]->L=2;
       for (i=1; i<=N-2; i++) R[i]->L=-(i+2);
       R[N-1]->L=R[N]->L=0;
L2:    s=0; t=N+1; p=R[s]->L; q=R[t]->L;
       if (q==0) goto END;
L3:    if (R[p]->K>R[q]->K) goto L6;
L4:    R[s]->L=set(R[s]->L,p); s=p; p=R[p]->L; if (p>0) goto L3;
L5:    R[s]->L=q; s=t; while (q>0) { t=q; q=R[q]->L; } goto L8;
L6:    R[s]->L=set(R[s]->L,q); s=q; q=R[q]->L; if (q>0) goto L3;
L7:    R[s]->L=p; s=t; while (p>0) { t=p; p=R[p]->L; }
L8:    p=-p; q=-q; if (q==0) { R[s]->L=set(R[s]->L,p); R[t]->L=0; goto L2; }
       else goto L3;
END:

5.2.5 Sorting by Distribution

Radix list sort

/* Not implemented, as it seems the algorithm description is incomplete.
 * For instance, "for some j!=1" isn't enough to describe the required loop
 * If anyone can implement this algorithm strictly based on the book, please
 * let me know. Otherwise, I recommend looking into "Engineering Radix Sort"
 * by D. McIlroy, P. McIlroy and K. Bostic.
 */


Epilog

It's not because it works that it's necessarily right.