The Trivium Cipher mapped into custom instructions

From Gezel2

Jump to: navigation, search

The [Trivium Cipher] is a synchronous stream cipher proposed in the ESTREAM competition. The cipher is designed by Christophe De Canniere and Bart Preneel from KU Leuven.

The particular implementation shown here is the cipher mapped as custom instructions for ARM. The Trivium kernel has been unrolled 64 times, so that each iteration through the algorithm returns 64 cipher bits. Next, the cipher kernel is controlled using two custom instructions.

  • A 3x1 instruction (3 inputs, 1 output: OP3X1_1) to load the key and initialization vector into the coprocessor. Trivium has 288 bits of state. We load 64 bits at a time through repeated execution of OP3x1_1.
  OP3X1_1(dummy_out, wordA, wordB, wordselection)
  
  with wordA: upper 32 bits
       wordB: lower 32 bits
       wordselection: block selection of 64-bits within 288
  • A 2x2 instruction (OP2x2_1) to retrieve cipher bits. This instruction returns 64 bits each time executes. The trivium kernel is controlled with a single control bit.
  OP2X2_1(outA, outB, ctl, dummy_in)
  
  with outA: upper 32 output bits
       outB: lower 32 output bits
       ctl:  lowest bit advances Trivium kernel 64 rounds
  • Note that an ARM, by default, has no custom instructions. The author of Simit-ARM [Wei Qin] has modified the ARM simulator so that it supports these custom instructions.

trivium.c

#include "armsys.h"
#include "armsfu.h"
#include <stdio.h>

int main() {
  int z1, z2, i;
  unsigned int stream[512]; 

  int key1 = 0x80;
  int key2 = 0xe0000000;

  int f1 = 1;
  int f2 = 2;
  int f3 = 3;
  int f4 = 4;
  int f5 = 5;
  int f0 = 0;

  // load key and IV
  // key = 80
  // IV  = 0
  OP3x1_1(z1,key1, f0, f1);
  OP3x1_1(z1,  f0, f0, f2);
  OP3x1_1(z1,  f0, f0, f3);
  OP3x1_1(z1,  f0, f0, f4);
  OP3x1_1(z1,key2, f0, f5);
  OP3x1_1(z1,  f0, f0, f0); 

  // run key schedule
  for (i=0; i<9; i++) {
    OP2x2_1(z1, z2, 1, 0);  
    OP2x2_1(z1, z2, 0, 0);  
   }

  // run keystream
  for (i=0; i<128; i++) {
    OP2x2_1(z1, z2, 1, 0);  
    stream[4*i]   = z1;
    stream[4*i+1] = z2;
    OP2x2_1(z1, z2, 0, 0);  
    stream[4*i+2] = z1;
    stream[4*i+3] = z2;
  }

  for (i=0; i<256; i++) {
    printf("%8x ", stream[i]);
    if (!((i+1) % 8))
      printf("\n");
  } 

  return 0;
}

trivium.fdl

This is the Trivium kernel, unrolled 64 times. Note the cosimulation interfaces that connect to the special instructions OP3x1_1 and OP2x2_1 above.

 ipblock myarm {							     
   iptype "armsystem";						     
   ipparm "exec=trivium";					     
 }								     
 								     
 ipblock armsfu1(out d1, d2 : ns(32);				     
                 in  q1, q2 : ns(32)) {				     
   iptype "armsfu2x2";						     
   ipparm "core = myarm";					     
   ipparm "device = 0";						     
 }								     
 								     
 ipblock armsfu2(out d1, d2, d3 : ns(32);			     
                 in  q1         : ns(32)) {			     
   iptype "armsfu3x1";						     
   ipparm "core = myarm";					     
   ipparm "device = 0";						     
 }								     
 								     
 dp trivium(in  si : ns(288);  // state input			     
            out so : ns(288);  // state output			     
            out z  : ns(1)) {  // crypto bit out			     
   sig  t1,  t2,  t3 : ns(  1);					     
   sig t11, t22, t33 : ns(  1);					     
   sig saa           : ns( 93);					     
   sig sbb           : ns( 84);					     
   sig scc           : ns(111);					     
   always {							     
     t1  = si[ 65] ^ si[ 92];					     
     t2  = si[161] ^ si[176];					     
     t3  = si[242] ^ si[287];					     
     z   = t1 ^ t2 ^ t3;						     
     t11 = t1 ^ (si[ 90] & si[ 91]) ^ si[170];			     
     t22 = t2 ^ (si[174] & si[175]) ^ si[263];			     
     t33 = t3 ^ (si[285] & si[286]) ^ si[ 68];			     
     saa = si[  0: 92] # t33;					     
     sbb = si[ 93:176] # t11;					     
     scc = si[177:287] # t22;					     
     so  = scc # sbb # saa;					     
   }								     
 }								     
 dp trivium2 : trivium						     
 								     
 dp trivium20(in  si : ns(288);  // state input			     
              out so : ns(288);  // state output			     
              out z  : ns(2)) {  // crypto bit out		     
   sig so0    : ns(288);						     
   sig z0, z1 : ns(1);						     
   use trivium (si, so0, z0);					     
   use trivium2(so0, so, z1);					     
   always {							     
     z = z0 # z1;						     
   }								     
 }								     
 dp trivium21 : trivium20					     
 								     
 dp trivium40(in  si : ns(288);    // state input		     
              out so : ns(288);    // state output		     
              out z  : ns(4)) {    // crypto bit out		     
   sig so0    : ns(288);						     
   sig z0, z1 : ns(2);						     
   use trivium20(si, so0, z0);					     
   use trivium21(so0, so, z1);					     
   always {							     
     z = z0 # z1;						     
   }								     
 }								     
 dp trivium41 : trivium40					     
 								     
 dp trivium80(in  si : ns(288);    // state input		     
              out so : ns(288);    // state output		     
              out z  : ns(8)) {    // crypto bit out		     
   sig so0    : ns(288);						     
   sig z0, z1 : ns(4);						     
   use trivium40(si, so0, z0);					     
   use trivium41(so0, so, z1);					     
   always {							     
     z = z0 # z1;						     
   }								     
 }								     
 dp trivium81 : trivium80					     
 								     
 dp trivium160(in  si : ns(288);    // state input		     
               out so : ns(288);    // state output		     
               out z  : ns(16)) {   // crypto bit out		     
   sig so0    : ns(288);						     
   sig z0, z1 : ns(8);						     
   use trivium80(si, so0, z0);					     
   use trivium81(so0, so, z1);					     
   always {							     
     z = z0 # z1;						     
   }								     
 }								     
 dp trivium161 : trivium160					     
 								     
 dp trivium320(in  si : ns(288);  // state input			     
               out so : ns(288);    // state output		     
               out z  : ns(32)) {    // crypto bit out		     
   sig so0    : ns(288);						     
   sig z0, z1 : ns(16);						     
   use trivium160(si, so0, z0);					     
   use trivium161(so0, so, z1);					     
   always {							     
     z = z0 # z1;						     
   }								     
 }								     
 								     
 dp trivium321 : trivium320					     
 								     
 dp triviumsfu {							     
   sig o2x2_d1, o2x2_d2, o2x2_q1, o2x2_q2 : ns(32);		     
   sig o3x1_d1, o3x1_d2, o3x1_d3, o3x1_q1 : ns(32);		     
   use armsfu1( o2x2_d1, o2x2_d2, o2x2_q1, o2x2_q2);		     
   use armsfu2( o3x1_d1, o3x1_d2, o3x1_d3, o3x1_q1);		     
   use myarm;							     
 								     
   reg w1, w2 : ns(32);						     
   reg w3, w4 : ns(32);						     
   reg w5, w6 : ns(32);						     
   reg w7, w8 : ns(32);						     
   reg w9     : ns(32);						     
   reg y      : ns(288);						     
   reg tick   : ns(1);						     
   sig adv    : ns(1);						     
 								     
   sig si0, si1 : ns(288);					     
   sig so0, so1 : ns(288);					     
   sig z0,  z1  : ns(32);					     
   use trivium320(si0, so0, z0);					     
   use trivium321(si1, so1, z1);					     
 								     
   always {							     
     // program trivium state					     
     w1 = adv ? so1[  0: 31] : ((o3x1_d3 == 1) ? o3x1_d1 : w1);	     
     w2 = adv ? so1[ 32: 63] : ((o3x1_d3 == 1) ? o3x1_d2 : w2);	     
     w3 = adv ? so1[ 64: 95] : ((o3x1_d3 == 2) ? o3x1_d1 : w3);	     
     w4 = adv ? so1[ 96:127] : ((o3x1_d3 == 2) ? o3x1_d2 : w4);	     
     w5 = adv ? so1[128:159] : ((o3x1_d3 == 3) ? o3x1_d1 : w5);	     
     w6 = adv ? so1[160:191] : ((o3x1_d3 == 3) ? o3x1_d2 : w6);	     
     w7 = adv ? so1[192:223] : ((o3x1_d3 == 4) ? o3x1_d1 : w7);	     
     w8 = adv ? so1[224:255] : ((o3x1_d3 == 4) ? o3x1_d2 : w8);	     
     w9 = adv ? so1[256:287] : ((o3x1_d3 == 5) ? o3x1_d1 : w9);	     
     o3x1_q1 = 0;						     
 								     
     si0 = w9 # w8 # w7 # w6 # w5 # w4 # w3 # w2 # w1;		     
     si1 = so0;							     
 								     
     o2x2_q1 = z0;						     
     o2x2_q2 = z1;						     
 								     
 								     
     tick = o2x2_d1[0];						     
     adv  = (tick != o2x2_d1[0]);				     
   }								     
 								     
 }								     
 								     
 system S {							     
    triviumsfu;							     
 }