The Trivium Cipher mapped into custom instructions
From Gezel2
The [Trivium Cipher] is a synchronous stream cipher proposed in the ESTREAM competition. The cipher is designed by Christophe De Canniere and Bart Preneel from KU Leuven.
The particular implementation shown here is the cipher mapped as custom instructions for ARM. The Trivium kernel has been unrolled 64 times, so that each iteration through the algorithm returns 64 cipher bits. Next, the cipher kernel is controlled using two custom instructions.
- A 3x1 instruction (3 inputs, 1 output: OP3X1_1) to load the key and initialization vector into the coprocessor. Trivium has 288 bits of state. We load 64 bits at a time through repeated execution of OP3x1_1.
OP3X1_1(dummy_out, wordA, wordB, wordselection)
with wordA: upper 32 bits
wordB: lower 32 bits
wordselection: block selection of 64-bits within 288
- A 2x2 instruction (OP2x2_1) to retrieve cipher bits. This instruction returns 64 bits each time executes. The trivium kernel is controlled with a single control bit.
OP2X2_1(outA, outB, ctl, dummy_in)
with outA: upper 32 output bits
outB: lower 32 output bits
ctl: lowest bit advances Trivium kernel 64 rounds
- Note that an ARM, by default, has no custom instructions. The author of Simit-ARM [Wei Qin] has modified the ARM simulator so that it supports these custom instructions.
trivium.c
#include "armsys.h"
#include "armsfu.h"
#include <stdio.h>
int main() {
int z1, z2, i;
unsigned int stream[512];
int key1 = 0x80;
int key2 = 0xe0000000;
int f1 = 1;
int f2 = 2;
int f3 = 3;
int f4 = 4;
int f5 = 5;
int f0 = 0;
// load key and IV
// key = 80
// IV = 0
OP3x1_1(z1,key1, f0, f1);
OP3x1_1(z1, f0, f0, f2);
OP3x1_1(z1, f0, f0, f3);
OP3x1_1(z1, f0, f0, f4);
OP3x1_1(z1,key2, f0, f5);
OP3x1_1(z1, f0, f0, f0);
// run key schedule
for (i=0; i<9; i++) {
OP2x2_1(z1, z2, 1, 0);
OP2x2_1(z1, z2, 0, 0);
}
// run keystream
for (i=0; i<128; i++) {
OP2x2_1(z1, z2, 1, 0);
stream[4*i] = z1;
stream[4*i+1] = z2;
OP2x2_1(z1, z2, 0, 0);
stream[4*i+2] = z1;
stream[4*i+3] = z2;
}
for (i=0; i<256; i++) {
printf("%8x ", stream[i]);
if (!((i+1) % 8))
printf("\n");
}
return 0;
}
trivium.fdl
This is the Trivium kernel, unrolled 64 times. Note the cosimulation interfaces that connect to the special instructions OP3x1_1 and OP2x2_1 above.
ipblock myarm {
iptype "armsystem";
ipparm "exec=trivium";
}
ipblock armsfu1(out d1, d2 : ns(32);
in q1, q2 : ns(32)) {
iptype "armsfu2x2";
ipparm "core = myarm";
ipparm "device = 0";
}
ipblock armsfu2(out d1, d2, d3 : ns(32);
in q1 : ns(32)) {
iptype "armsfu3x1";
ipparm "core = myarm";
ipparm "device = 0";
}
dp trivium(in si : ns(288); // state input
out so : ns(288); // state output
out z : ns(1)) { // crypto bit out
sig t1, t2, t3 : ns( 1);
sig t11, t22, t33 : ns( 1);
sig saa : ns( 93);
sig sbb : ns( 84);
sig scc : ns(111);
always {
t1 = si[ 65] ^ si[ 92];
t2 = si[161] ^ si[176];
t3 = si[242] ^ si[287];
z = t1 ^ t2 ^ t3;
t11 = t1 ^ (si[ 90] & si[ 91]) ^ si[170];
t22 = t2 ^ (si[174] & si[175]) ^ si[263];
t33 = t3 ^ (si[285] & si[286]) ^ si[ 68];
saa = si[ 0: 92] # t33;
sbb = si[ 93:176] # t11;
scc = si[177:287] # t22;
so = scc # sbb # saa;
}
}
dp trivium2 : trivium
dp trivium20(in si : ns(288); // state input
out so : ns(288); // state output
out z : ns(2)) { // crypto bit out
sig so0 : ns(288);
sig z0, z1 : ns(1);
use trivium (si, so0, z0);
use trivium2(so0, so, z1);
always {
z = z0 # z1;
}
}
dp trivium21 : trivium20
dp trivium40(in si : ns(288); // state input
out so : ns(288); // state output
out z : ns(4)) { // crypto bit out
sig so0 : ns(288);
sig z0, z1 : ns(2);
use trivium20(si, so0, z0);
use trivium21(so0, so, z1);
always {
z = z0 # z1;
}
}
dp trivium41 : trivium40
dp trivium80(in si : ns(288); // state input
out so : ns(288); // state output
out z : ns(8)) { // crypto bit out
sig so0 : ns(288);
sig z0, z1 : ns(4);
use trivium40(si, so0, z0);
use trivium41(so0, so, z1);
always {
z = z0 # z1;
}
}
dp trivium81 : trivium80
dp trivium160(in si : ns(288); // state input
out so : ns(288); // state output
out z : ns(16)) { // crypto bit out
sig so0 : ns(288);
sig z0, z1 : ns(8);
use trivium80(si, so0, z0);
use trivium81(so0, so, z1);
always {
z = z0 # z1;
}
}
dp trivium161 : trivium160
dp trivium320(in si : ns(288); // state input
out so : ns(288); // state output
out z : ns(32)) { // crypto bit out
sig so0 : ns(288);
sig z0, z1 : ns(16);
use trivium160(si, so0, z0);
use trivium161(so0, so, z1);
always {
z = z0 # z1;
}
}
dp trivium321 : trivium320
dp triviumsfu {
sig o2x2_d1, o2x2_d2, o2x2_q1, o2x2_q2 : ns(32);
sig o3x1_d1, o3x1_d2, o3x1_d3, o3x1_q1 : ns(32);
use armsfu1( o2x2_d1, o2x2_d2, o2x2_q1, o2x2_q2);
use armsfu2( o3x1_d1, o3x1_d2, o3x1_d3, o3x1_q1);
use myarm;
reg w1, w2 : ns(32);
reg w3, w4 : ns(32);
reg w5, w6 : ns(32);
reg w7, w8 : ns(32);
reg w9 : ns(32);
reg y : ns(288);
reg tick : ns(1);
sig adv : ns(1);
sig si0, si1 : ns(288);
sig so0, so1 : ns(288);
sig z0, z1 : ns(32);
use trivium320(si0, so0, z0);
use trivium321(si1, so1, z1);
always {
// program trivium state
w1 = adv ? so1[ 0: 31] : ((o3x1_d3 == 1) ? o3x1_d1 : w1);
w2 = adv ? so1[ 32: 63] : ((o3x1_d3 == 1) ? o3x1_d2 : w2);
w3 = adv ? so1[ 64: 95] : ((o3x1_d3 == 2) ? o3x1_d1 : w3);
w4 = adv ? so1[ 96:127] : ((o3x1_d3 == 2) ? o3x1_d2 : w4);
w5 = adv ? so1[128:159] : ((o3x1_d3 == 3) ? o3x1_d1 : w5);
w6 = adv ? so1[160:191] : ((o3x1_d3 == 3) ? o3x1_d2 : w6);
w7 = adv ? so1[192:223] : ((o3x1_d3 == 4) ? o3x1_d1 : w7);
w8 = adv ? so1[224:255] : ((o3x1_d3 == 4) ? o3x1_d2 : w8);
w9 = adv ? so1[256:287] : ((o3x1_d3 == 5) ? o3x1_d1 : w9);
o3x1_q1 = 0;
si0 = w9 # w8 # w7 # w6 # w5 # w4 # w3 # w2 # w1;
si1 = so0;
o2x2_q1 = z0;
o2x2_q2 = z1;
tick = o2x2_d1[0];
adv = (tick != o2x2_d1[0]);
}
}
system S {
triviumsfu;
}
