The Euclid Algorithm as a memory-mapped coprocessor for ARM

From Gezel2

Jump to: navigation, search

This example shows a cosimulation between GEZEL and an ARM processor. The instruction-set simulator for the ARM is from the [Simit-ARM project].

euclid.fdl

This design wraps the a standalone hardware processor for Euclid's Algorithm into a memory-mapped coprocessor (for a 32-bit microprocessor). In the GEZEL design, the 'encap_euclid' module wraps the 'euclid' core into a thee-port module. 'encap_euclid' defines the instruction-set of the coprocessor. Three GEZEL ipblock are used to define the memory-mapped interfaces for the ARM processor.

dp euclid(in  m_in, n_in : ns(32);
          out gcd_out    : ns(32);
          in  c_ld       : ns(1);
          out c_done     : ns(1)) {  

  reg m, n               : ns(32);
  reg factor             : ns(32);
  reg ld                 : ns(1);
  reg done               : ns(1);

  sfg load    { ld = c_ld; m = m_in; n = n_in; }
  sfg init    { factor = 0; done = 0;}
  sfg flags   { done = ((m == 0) | (n == 0));  }
  sfg shiftm  { m = m >> 1; }
  sfg shiftn  { n = n >> 1; }
  sfg reduce  { m = (m >= n) ? m - n : m;  
                n = (n >  m) ? n - m : n; }
  sfg shiftf  { factor = factor + 1; } 
  sfg outidle { gcd_out = 0;
                c_done  = 0;}
  sfg complete{ gcd_out = ((m > n) ? m : n) << factor;
                c_done  = 1; }
}

fsm euclid_ctl(euclid) {
  initial s0;
  state s1, s2; 

  @s0 if (ld) then (outidle) -> s1;
      else (init, load, outidle) -> s0;
  @s1 if (done)               then (complete)                 -> s2;
      else if ( m[0] &  n[0]) then (reduce, outidle, flags)   -> s1;
      else if ( m[0] & ~n[0]) then (shiftn, outidle, flags)   -> s1;
      else if (~m[0] &  n[0]) then (shiftm, outidle, flags)   -> s1;
                              else (shiftn, shiftm, 
                                    shiftf, outidle, flags)   -> s1;
  @s2 if (ld) then (load, complete) -> s2;
      else (init, load, outidle) -> s0;
}

dp encap_euclid(in   ins : ns(8);
                in   din : ns(32); 
                out dout : ns(32)) { 

   reg ld, done  :  ns(1);
   reg m, n, gcd : ns(32);
   sig gcd_out   : ns(32);
   reg insreg    : ns(32);
   sig c_done    : ns(1); 

   use euclid(m, n, gcd_out, ld, c_done); 

   sfg decode  { insreg = ins;
                 dout   = gcd; 
                 done   = c_done; }
   sfg load_m  { m  = din; 
                 $display($cycle, ": load m ", m); 
                }
   sfg load_n  { n  = din; 
                 $display($cycle, ": load n ", n); 
                }
   sfg dogcd   { ld = 1; gcd = 0; 
                 $display($cycle, ": dogcd"); 
                }
   sfg donegcd { ld = 0; gcd = gcd_out; 
                 $display($cycle, ": done. result=", gcd); 
                }
}

fsm ctl_encap_euclid(encap_euclid) {
  initial s0;
  state s1, s2, s3; 

  @s0 (decode) -> s1; 

  @s1 if (insreg == 1)      then (load_m, decode) -> s2;
      else if (insreg == 2) then (load_n, decode) -> s2;
      else if (insreg == 3) then (dogcd, decode ) -> s3;
                            else (decode) -> s1; 

  @s2 if (insreg == 0) then (decode) -> s1;
                       else (decode) -> s2; 

  @s3 if (done) then (donegcd, decode) -> s2;
                else (decode)          -> s3;

}

ipblock myarm {
  iptype "armsystem";
  ipparm "exec=euclid";
}

// instruction input
ipblock b_ins(out data : ns(8)) {
  iptype "armsystemsource";
  ipparm "core=myarm";
  ipparm "address=0x80000000";
}

// data input
ipblock b_datain(out data : ns(32)) {
  iptype "armsystemsource";
  ipparm "core=myarm";
  ipparm "address=0x80000008";
}

// data output
ipblock b_dataout(in data : ns(32)) {
  iptype "armsystemsink";
  ipparm "core=myarm";
  ipparm "address=0x80000004";
}

dp sys {
  sig ins, din, dout : ns(32);

  use myarm;
  use encap_euclid(ins, din, dout);
  use b_ins(ins);
  use b_datain(din);
  use b_dataout(dout);
}

system S {
  sys;
}

euclid.c

The 'euclid.c' description shows a sample driver in C that feeds two integers into the coprocessor and that retrieves the resulting GCD.

int gcd(int m, int n) {
 volatile unsigned char *ins  = (volatile unsigned char *) 0x80000000;
 volatile unsigned int  *din  = (volatile unsigned int  *) 0x80000008;
 volatile unsigned int  *dout = (volatile unsigned int  *) 0x80000004;
 int r;
 enum {idle, load_m, load_n, dogcd};

 *ins = idle;

 *din = m;
 *ins = load_m;
 *ins = load_m;

 *ins = idle;

 *din = n;
 *ins = load_n;
 
 *ins = idle;

 *ins = dogcd;

 while (! (r = *dout) );

 *ins = idle;
 return r;
}

int main() {
 int i;
 for (i=894; i<904; i++)
   printf("The GCD of 2322 and %d is %d\n", i, gcd(2322, i));
 return 0;

}

Simulation Output

>/opt/arm-linux-3.2/bin/arm-linux-gcc -static -O3 euclid.c -o euclid
>gplatform euclid.fdl
armsystem: loading executable [euclid]
armsystemsink: set address 2147483652
21842: load m 0/912
21845: load n 0/37e
21863: dogcd
21885: done. result=0/6
The GCD of 2322 and 894 is 6
32885: load m 912/912
32888: load n 37e/37f
32890: dogcd
32919: done. result=0/1
The GCD of 2322 and 895 is 6
35162: load m 912/912
35165: load n 37f/380
35167: dogcd
35198: done. result=0/2
The GCD of 2322 and 896 is 1
37439: load m 912/912
37442: load n 380/381
37444: dogcd
37467: done. result=0/3
The GCD of 2322 and 897 is 2
39716: load m 912/912
39719: load n 381/382
39721: dogcd
39749: done. result=0/2
The GCD of 2322 and 898 is 3
41993: load m 912/912
41996: load n 382/383
41998: dogcd
42026: done. result=0/1
The GCD of 2322 and 899 is 2
44270: load m 912/912
44273: load n 383/384
44275: dogcd
44296: done. result=0/12
The GCD of 2322 and 900 is 1
46547: load m 912/912
46550: load n 384/385
46552: dogcd
46580: done. result=0/1
The GCD of 2322 and 901 is 18
48850: load m 912/912
48853: load n 385/386
48855: dogcd
48882: done. result=0/2
The GCD of 2322 and 902 is 1
51127: load m 912/912
51130: load n 386/387
51132: dogcd
51145: done. result=0/81
The GCD of 2322 and 903 is 2
Total Cycles: 56229