<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> <title>Crypto++: salsa.cpp Source File</title> <link href="tabs.css" rel="stylesheet" type="text/css"/> <link href="doxygen.css" rel="stylesheet" type="text/css"/> </head> <body> <!-- Generated by Doxygen 1.6.1 --> <div class="navigation" id="top"> <div class="tabs"> <ul> <li><a href="index.html"><span>Main Page</span></a></li> <li><a href="namespaces.html"><span>Namespaces</span></a></li> <li><a href="annotated.html"><span>Classes</span></a></li> <li class="current"><a href="files.html"><span>Files</span></a></li> </ul> </div> <div class="tabs"> <ul> <li><a href="files.html"><span>File List</span></a></li> <li><a href="globals.html"><span>File Members</span></a></li> </ul> </div> <h1>salsa.cpp</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">// salsa.cpp - written and placed in the public domain by Wei Dai</span> <a name="l00002"></a>00002 <a name="l00003"></a>00003 <span class="comment">// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM salsa.cpp" to generate MASM code</span> <a name="l00004"></a>00004 <a name="l00005"></a>00005 <span class="preprocessor">#include "pch.h"</span> <a name="l00006"></a>00006 <a name="l00007"></a>00007 <span class="preprocessor">#ifndef CRYPTOPP_GENERATE_X64_MASM</span> <a name="l00008"></a>00008 <span class="preprocessor"></span> <a name="l00009"></a>00009 <span class="preprocessor">#include "salsa.h"</span> <a name="l00010"></a>00010 <span class="preprocessor">#include "misc.h"</span> <a name="l00011"></a>00011 <span class="preprocessor">#include "argnames.h"</span> <a name="l00012"></a>00012 <span class="preprocessor">#include "cpu.h"</span> <a name="l00013"></a>00013 <a name="l00014"></a>00014 NAMESPACE_BEGIN(CryptoPP) <a name="l00015"></a>00015 <a name="l00016"></a>00016 void Salsa20_TestInstantiations() <a name="l00017"></a>00017 { <a name="l00018"></a>00018 <a class="code" href="class_symmetric_cipher_final.html" title="_">Salsa20::Encryption</a> x; <a name="l00019"></a>00019 } <a name="l00020"></a>00020 <a name="l00021"></a>00021 <span class="keywordtype">void</span> Salsa20_Policy::CipherSetKey(<span class="keyword">const</span> <a class="code" href="class_name_value_pairs.html" title="interface for retrieving values given their names">NameValuePairs</a> &params, <span class="keyword">const</span> byte *key, <span class="keywordtype">size_t</span> length) <a name="l00022"></a>00022 { <a name="l00023"></a>00023 m_rounds = params.<a class="code" href="class_name_value_pairs.html#ac269314685b737912d3499f4a9399618" title="get a named value with type int, with default">GetIntValueWithDefault</a>(Name::Rounds(), 20); <a name="l00024"></a>00024 <a name="l00025"></a>00025 <span class="keywordflow">if</span> (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20)) <a name="l00026"></a>00026 <span class="keywordflow">throw</span> <a class="code" href="class_invalid_rounds.html" title="_">InvalidRounds</a>(Salsa20::StaticAlgorithmName(), m_rounds); <a name="l00027"></a>00027 <a name="l00028"></a>00028 <span class="comment">// m_state is reordered for SSE2</span> <a name="l00029"></a>00029 <a class="code" href="class_get_block.html">GetBlock<word32, LittleEndian></a> get1(key); <a name="l00030"></a>00030 get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]); <a name="l00031"></a>00031 <a class="code" href="class_get_block.html">GetBlock<word32, LittleEndian></a> get2(key + length - 16); <a name="l00032"></a>00032 get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]); <a name="l00033"></a>00033 <a name="l00034"></a>00034 <span class="comment">// "expand 16-byte k" or "expand 32-byte k"</span> <a name="l00035"></a>00035 m_state[0] = 0x61707865; <a name="l00036"></a>00036 m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e; <a name="l00037"></a>00037 m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32; <a name="l00038"></a>00038 m_state[3] = 0x6b206574; <a name="l00039"></a>00039 } <a name="l00040"></a>00040 <a name="l00041"></a>00041 <span class="keywordtype">void</span> Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer, <span class="keyword">const</span> byte *IV, <span class="keywordtype">size_t</span> length) <a name="l00042"></a>00042 { <a name="l00043"></a>00043 assert(length==8); <a name="l00044"></a>00044 <a class="code" href="class_get_block.html">GetBlock<word32, LittleEndian></a> <span class="keyword">get</span>(IV); <a name="l00045"></a>00045 <span class="keyword">get</span>(m_state[14])(m_state[11]); <a name="l00046"></a>00046 m_state[8] = m_state[5] = 0; <a name="l00047"></a>00047 } <a name="l00048"></a>00048 <a name="l00049"></a>00049 <span class="keywordtype">void</span> Salsa20_Policy::SeekToIteration(lword iterationCount) <a name="l00050"></a>00050 { <a name="l00051"></a>00051 m_state[8] = (word32)iterationCount; <a name="l00052"></a>00052 m_state[5] = (word32)SafeRightShift<32>(iterationCount); <a name="l00053"></a>00053 } <a name="l00054"></a>00054 <a name="l00055"></a>00055 <span class="preprocessor">#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64</span> <a name="l00056"></a>00056 <span class="preprocessor"></span><span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> Salsa20_Policy::GetAlignment()<span class="keyword"> const</span> <a name="l00057"></a>00057 <span class="keyword"></span>{ <a name="l00058"></a>00058 <span class="preprocessor">#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE</span> <a name="l00059"></a>00059 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2()) <a name="l00060"></a>00060 <span class="keywordflow">return</span> 16; <a name="l00061"></a>00061 <span class="keywordflow">else</span> <a name="l00062"></a>00062 <span class="preprocessor">#endif</span> <a name="l00063"></a>00063 <span class="preprocessor"></span> <span class="keywordflow">return</span> GetAlignmentOf<word32>(); <a name="l00064"></a>00064 } <a name="l00065"></a>00065 <a name="l00066"></a>00066 <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> Salsa20_Policy::GetOptimalBlockSize()<span class="keyword"> const</span> <a name="l00067"></a>00067 <span class="keyword"></span>{ <a name="l00068"></a>00068 <span class="preprocessor">#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE</span> <a name="l00069"></a>00069 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2()) <a name="l00070"></a>00070 <span class="keywordflow">return</span> 4*BYTES_PER_ITERATION; <a name="l00071"></a>00071 <span class="keywordflow">else</span> <a name="l00072"></a>00072 <span class="preprocessor">#endif</span> <a name="l00073"></a>00073 <span class="preprocessor"></span> <span class="keywordflow">return</span> BYTES_PER_ITERATION; <a name="l00074"></a>00074 } <a name="l00075"></a>00075 <span class="preprocessor">#endif</span> <a name="l00076"></a>00076 <span class="preprocessor"></span> <a name="l00077"></a>00077 <span class="preprocessor">#ifdef CRYPTOPP_X64_MASM_AVAILABLE</span> <a name="l00078"></a>00078 <span class="preprocessor"></span><span class="keyword">extern</span> <span class="stringliteral">"C"</span> { <a name="l00079"></a>00079 <span class="keywordtype">void</span> Salsa20_OperateKeystream(byte *output, <span class="keyword">const</span> byte *input, <span class="keywordtype">size_t</span> iterationCount, <span class="keywordtype">int</span> rounds, <span class="keywordtype">void</span> *state); <a name="l00080"></a>00080 } <a name="l00081"></a>00081 <span class="preprocessor">#endif</span> <a name="l00082"></a>00082 <span class="preprocessor"></span> <a name="l00083"></a>00083 <span class="preprocessor">#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code</span> <a name="l00084"></a>00084 <span class="preprocessor"></span> <a name="l00085"></a>00085 <span class="keywordtype">void</span> Salsa20_Policy::OperateKeystream(KeystreamOperation operation, byte *output, <span class="keyword">const</span> byte *input, <span class="keywordtype">size_t</span> iterationCount) <a name="l00086"></a>00086 { <a name="l00087"></a>00087 <span class="preprocessor">#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM</span> <a name="l00088"></a>00088 <span class="preprocessor"></span> <a name="l00089"></a>00089 <span class="preprocessor">#ifdef CRYPTOPP_X64_MASM_AVAILABLE</span> <a name="l00090"></a>00090 <span class="preprocessor"></span> Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.data()); <a name="l00091"></a>00091 <span class="keywordflow">return</span>; <a name="l00092"></a>00092 <span class="preprocessor">#endif</span> <a name="l00093"></a>00093 <span class="preprocessor"></span> <a name="l00094"></a>00094 <span class="preprocessor">#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE</span> <a name="l00095"></a>00095 <span class="preprocessor"></span><span class="preprocessor">#ifdef CRYPTOPP_GENERATE_X64_MASM</span> <a name="l00096"></a>00096 <span class="preprocessor"></span> ALIGN 8 <a name="l00097"></a>00097 Salsa20_OperateKeystream PROC FRAME <a name="l00098"></a>00098 mov r10, [rsp + 5*8] ; state <a name="l00099"></a>00099 alloc_stack(10*16 + 32*16 + 8) <a name="l00100"></a>00100 save_xmm128 xmm6, 0200h <a name="l00101"></a>00101 save_xmm128 xmm7, 0210h <a name="l00102"></a>00102 save_xmm128 xmm8, 0220h <a name="l00103"></a>00103 save_xmm128 xmm9, 0230h <a name="l00104"></a>00104 save_xmm128 xmm10, 0240h <a name="l00105"></a>00105 save_xmm128 xmm11, 0250h <a name="l00106"></a>00106 save_xmm128 xmm12, 0260h <a name="l00107"></a>00107 save_xmm128 xmm13, 0270h <a name="l00108"></a>00108 save_xmm128 xmm14, 0280h <a name="l00109"></a>00109 save_xmm128 xmm15, 0290h <a name="l00110"></a>00110 .endprolog <a name="l00111"></a>00111 <a name="l00112"></a>00112 <span class="preprocessor">#define REG_output rcx</span> <a name="l00113"></a>00113 <span class="preprocessor"></span><span class="preprocessor"> #define REG_input rdx</span> <a name="l00114"></a>00114 <span class="preprocessor"></span><span class="preprocessor"> #define REG_iterationCount r8</span> <a name="l00115"></a>00115 <span class="preprocessor"></span><span class="preprocessor"> #define REG_state r10</span> <a name="l00116"></a>00116 <span class="preprocessor"></span><span class="preprocessor"> #define REG_rounds e9d</span> <a name="l00117"></a>00117 <span class="preprocessor"></span><span class="preprocessor"> #define REG_roundsLeft eax</span> <a name="l00118"></a>00118 <span class="preprocessor"></span><span class="preprocessor"> #define REG_temp32 r11d</span> <a name="l00119"></a>00119 <span class="preprocessor"></span><span class="preprocessor"> #define REG_temp r11</span> <a name="l00120"></a>00120 <span class="preprocessor"></span><span class="preprocessor"> #define SSE2_WORKSPACE rsp</span> <a name="l00121"></a>00121 <span class="preprocessor"></span><span class="preprocessor">#else</span> <a name="l00122"></a>00122 <span class="preprocessor"></span> <span class="keywordflow">if</span> (HasSSE2()) <a name="l00123"></a>00123 { <a name="l00124"></a>00124 <span class="preprocessor"> #if CRYPTOPP_BOOL_X64</span> <a name="l00125"></a>00125 <span class="preprocessor"></span><span class="preprocessor"> #define REG_output %4</span> <a name="l00126"></a>00126 <span class="preprocessor"></span><span class="preprocessor"> #define REG_input %1</span> <a name="l00127"></a>00127 <span class="preprocessor"></span><span class="preprocessor"> #define REG_iterationCount %2</span> <a name="l00128"></a>00128 <span class="preprocessor"></span><span class="preprocessor"> #define REG_state %3</span> <a name="l00129"></a>00129 <span class="preprocessor"></span><span class="preprocessor"> #define REG_rounds %0</span> <a name="l00130"></a>00130 <span class="preprocessor"></span><span class="preprocessor"> #define REG_roundsLeft eax</span> <a name="l00131"></a>00131 <span class="preprocessor"></span><span class="preprocessor"> #define REG_temp32 edx</span> <a name="l00132"></a>00132 <span class="preprocessor"></span><span class="preprocessor"> #define REG_temp rdx</span> <a name="l00133"></a>00133 <span class="preprocessor"></span><span class="preprocessor"> #define SSE2_WORKSPACE %5</span> <a name="l00134"></a>00134 <span class="preprocessor"></span> <a name="l00135"></a>00135 <a class="code" href="class_fixed_size_aligned_sec_block.html">FixedSizeAlignedSecBlock<byte, 32*16></a> workspace; <a name="l00136"></a>00136 <span class="preprocessor"> #else</span> <a name="l00137"></a>00137 <span class="preprocessor"></span><span class="preprocessor"> #define REG_output edi</span> <a name="l00138"></a>00138 <span class="preprocessor"></span><span class="preprocessor"> #define REG_input eax</span> <a name="l00139"></a>00139 <span class="preprocessor"></span><span class="preprocessor"> #define REG_iterationCount ecx</span> <a name="l00140"></a>00140 <span class="preprocessor"></span><span class="preprocessor"> #define REG_state esi</span> <a name="l00141"></a>00141 <span class="preprocessor"></span><span class="preprocessor"> #define REG_rounds edx</span> <a name="l00142"></a>00142 <span class="preprocessor"></span><span class="preprocessor"> #define REG_roundsLeft ebx</span> <a name="l00143"></a>00143 <span class="preprocessor"></span><span class="preprocessor"> #define REG_temp32 ebp</span> <a name="l00144"></a>00144 <span class="preprocessor"></span><span class="preprocessor"> #define REG_temp ebp</span> <a name="l00145"></a>00145 <span class="preprocessor"></span><span class="preprocessor"> #define SSE2_WORKSPACE esp + WORD_SZ</span> <a name="l00146"></a>00146 <span class="preprocessor"></span><span class="preprocessor"> #endif</span> <a name="l00147"></a>00147 <span class="preprocessor"></span> <a name="l00148"></a>00148 <span class="preprocessor"> #ifdef __GNUC__</span> <a name="l00149"></a>00149 <span class="preprocessor"></span> __asm__ __volatile__ <a name="l00150"></a>00150 ( <a name="l00151"></a>00151 <span class="stringliteral">".intel_syntax noprefix;"</span> <a name="l00152"></a>00152 AS_PUSH_IF86( bx) <a name="l00153"></a>00153 #<span class="keywordflow">else</span> <a name="l00154"></a>00154 <span class="keywordtype">void</span> *s = m_state.data(); <a name="l00155"></a>00155 word32 r = m_rounds; <a name="l00156"></a>00156 <a name="l00157"></a>00157 AS2( mov REG_iterationCount, iterationCount) <a name="l00158"></a>00158 AS2( mov REG_input, input) <a name="l00159"></a>00159 AS2( mov REG_output, output) <a name="l00160"></a>00160 AS2( mov REG_state, s) <a name="l00161"></a>00161 AS2( mov REG_rounds, r) <a name="l00162"></a>00162 <span class="preprocessor">#endif</span> <a name="l00163"></a>00163 <span class="preprocessor"></span><span class="preprocessor">#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM</span> <a name="l00164"></a>00164 <span class="preprocessor"></span> <a name="l00165"></a>00165 AS_PUSH_IF86( bp) <a name="l00166"></a>00166 AS2( cmp REG_iterationCount, 4) <a name="l00167"></a>00167 ASJ( jl, 5, f) <a name="l00168"></a>00168 <a name="l00169"></a>00169 <span class="preprocessor">#if CRYPTOPP_BOOL_X86</span> <a name="l00170"></a>00170 <span class="preprocessor"></span> AS2( mov ebx, esp) <a name="l00171"></a>00171 AS2( and esp, -16) <a name="l00172"></a>00172 AS2( sub esp, 32*16) <a name="l00173"></a>00173 AS1( push ebx) <a name="l00174"></a>00174 <span class="preprocessor">#endif</span> <a name="l00175"></a>00175 <span class="preprocessor"></span> <a name="l00176"></a>00176 <span class="preprocessor">#define SSE2_EXPAND_S(i, j) \</span> <a name="l00177"></a>00177 <span class="preprocessor"> ASS( pshufd xmm4, xmm##i, j, j, j, j) \</span> <a name="l00178"></a>00178 <span class="preprocessor"> AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4)</span> <a name="l00179"></a>00179 <span class="preprocessor"></span> <a name="l00180"></a>00180 AS2( movdqa xmm0, [REG_state + 0*16]) <a name="l00181"></a>00181 AS2( movdqa xmm1, [REG_state + 1*16]) <a name="l00182"></a>00182 AS2( movdqa xmm2, [REG_state + 2*16]) <a name="l00183"></a>00183 AS2( movdqa xmm3, [REG_state + 3*16]) <a name="l00184"></a>00184 SSE2_EXPAND_S(0, 0) <a name="l00185"></a>00185 SSE2_EXPAND_S(0, 1) <a name="l00186"></a>00186 SSE2_EXPAND_S(0, 2) <a name="l00187"></a>00187 SSE2_EXPAND_S(0, 3) <a name="l00188"></a>00188 SSE2_EXPAND_S(1, 0) <a name="l00189"></a>00189 SSE2_EXPAND_S(1, 2) <a name="l00190"></a>00190 SSE2_EXPAND_S(1, 3) <a name="l00191"></a>00191 SSE2_EXPAND_S(2, 1) <a name="l00192"></a>00192 SSE2_EXPAND_S(2, 2) <a name="l00193"></a>00193 SSE2_EXPAND_S(2, 3) <a name="l00194"></a>00194 SSE2_EXPAND_S(3, 0) <a name="l00195"></a>00195 SSE2_EXPAND_S(3, 1) <a name="l00196"></a>00196 SSE2_EXPAND_S(3, 2) <a name="l00197"></a>00197 SSE2_EXPAND_S(3, 3) <a name="l00198"></a>00198 <a name="l00199"></a>00199 <span class="preprocessor">#define SSE2_EXPAND_S85(i) \</span> <a name="l00200"></a>00200 <span class="preprocessor"> AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \</span> <a name="l00201"></a>00201 <span class="preprocessor"> AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \</span> <a name="l00202"></a>00202 <span class="preprocessor"> AS2( add REG_roundsLeft, 1) \</span> <a name="l00203"></a>00203 <span class="preprocessor"> AS2( adc REG_temp32, 0)</span> <a name="l00204"></a>00204 <span class="preprocessor"></span> <a name="l00205"></a>00205 ASL(1) <a name="l00206"></a>00206 AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4]) <a name="l00207"></a>00207 AS2( mov REG_temp32, dword ptr [REG_state + 5*4]) <a name="l00208"></a>00208 SSE2_EXPAND_S85(0) <a name="l00209"></a>00209 SSE2_EXPAND_S85(1) <a name="l00210"></a>00210 SSE2_EXPAND_S85(2) <a name="l00211"></a>00211 SSE2_EXPAND_S85(3) <a name="l00212"></a>00212 AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft) <a name="l00213"></a>00213 AS2( mov dword ptr [REG_state + 5*4], REG_temp32) <a name="l00214"></a>00214 <a name="l00215"></a>00215 <span class="preprocessor">#define SSE2_QUARTER_ROUND(a, b, d, i) \</span> <a name="l00216"></a>00216 <span class="preprocessor"> AS2( movdqa xmm4, xmm##d) \</span> <a name="l00217"></a>00217 <span class="preprocessor"> AS2( paddd xmm4, xmm##a) \</span> <a name="l00218"></a>00218 <span class="preprocessor"> AS2( movdqa xmm5, xmm4) \</span> <a name="l00219"></a>00219 <span class="preprocessor"> AS2( pslld xmm4, i) \</span> <a name="l00220"></a>00220 <span class="preprocessor"> AS2( psrld xmm5, 32-i) \</span> <a name="l00221"></a>00221 <span class="preprocessor"> AS2( pxor xmm##b, xmm4) \</span> <a name="l00222"></a>00222 <span class="preprocessor"> AS2( pxor xmm##b, xmm5)</span> <a name="l00223"></a>00223 <span class="preprocessor"></span> <a name="l00224"></a>00224 <span class="preprocessor">#define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) </span><span class="comment">/* y3 */</span> <a name="l00225"></a>00225 <span class="preprocessor">#define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) </span><span class="comment">/* y0 */</span> <a name="l00226"></a>00226 <span class="preprocessor">#define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) </span><span class="comment">/* y0+y3 */</span> <a name="l00227"></a>00227 <span class="preprocessor">#define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) </span> <a name="l00228"></a>00228 <span class="preprocessor"></span><span class="preprocessor">#define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) </span> <a name="l00229"></a>00229 <span class="preprocessor"></span><span class="preprocessor">#define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) </span> <a name="l00230"></a>00230 <span class="preprocessor"></span><span class="preprocessor">#define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) </span> <a name="l00231"></a>00231 <span class="preprocessor"></span><span class="preprocessor">#define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) </span><span class="comment">/* z1 */</span> <a name="l00232"></a>00232 <span class="preprocessor">#define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) </span> <a name="l00233"></a>00233 <span class="preprocessor"></span><span class="preprocessor">#define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) </span> <a name="l00234"></a>00234 <span class="preprocessor"></span><span class="preprocessor">#define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) </span><span class="comment">/* z1+y0 */</span> <a name="l00235"></a>00235 <span class="preprocessor">#define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) </span> <a name="l00236"></a>00236 <span class="preprocessor"></span><span class="preprocessor">#define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) </span> <a name="l00237"></a>00237 <span class="preprocessor"></span><span class="preprocessor">#define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) </span> <a name="l00238"></a>00238 <span class="preprocessor"></span><span class="preprocessor">#define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) </span> <a name="l00239"></a>00239 <span class="preprocessor"></span><span class="preprocessor">#define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) </span><span class="comment">/* z2 */</span> <a name="l00240"></a>00240 <span class="preprocessor">#define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) </span> <a name="l00241"></a>00241 <span class="preprocessor"></span><span class="preprocessor">#define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) </span> <a name="l00242"></a>00242 <span class="preprocessor"></span><span class="preprocessor">#define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) </span><span class="comment">/* z2+z1 */</span> <a name="l00243"></a>00243 <span class="preprocessor">#define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) </span> <a name="l00244"></a>00244 <span class="preprocessor"></span><span class="preprocessor">#define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) </span> <a name="l00245"></a>00245 <span class="preprocessor"></span><span class="preprocessor">#define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) </span> <a name="l00246"></a>00246 <span class="preprocessor"></span><span class="preprocessor">#define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) </span> <a name="l00247"></a>00247 <span class="preprocessor"></span><span class="preprocessor">#define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) </span><span class="comment">/* z3 */</span> <a name="l00248"></a>00248 <span class="preprocessor">#define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) </span> <a name="l00249"></a>00249 <span class="preprocessor"></span><span class="preprocessor">#define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) </span><span class="comment">/* z3+z2 */</span> <a name="l00250"></a>00250 <span class="preprocessor">#define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) </span> <a name="l00251"></a>00251 <span class="preprocessor"></span><span class="preprocessor">#define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) </span> <a name="l00252"></a>00252 <span class="preprocessor"></span><span class="preprocessor">#define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) </span> <a name="l00253"></a>00253 <span class="preprocessor"></span><span class="preprocessor">#define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) </span><span class="comment">/* xor y0 */</span> <a name="l00254"></a>00254 <span class="preprocessor">#define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) </span><span class="comment">/* z0 */</span> <a name="l00255"></a>00255 <span class="preprocessor">#define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) </span> <a name="l00256"></a>00256 <span class="preprocessor"></span> <a name="l00257"></a>00257 <span class="preprocessor">#define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \</span> <a name="l00258"></a>00258 <span class="preprocessor"> L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \</span> <a name="l00259"></a>00259 <span class="preprocessor"> L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \</span> <a name="l00260"></a>00260 <span class="preprocessor"> L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \</span> <a name="l00261"></a>00261 <span class="preprocessor"> L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \</span> <a name="l00262"></a>00262 <span class="preprocessor"> L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \</span> <a name="l00263"></a>00263 <span class="preprocessor"> L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \</span> <a name="l00264"></a>00264 <span class="preprocessor"> L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \</span> <a name="l00265"></a>00265 <span class="preprocessor"> L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \</span> <a name="l00266"></a>00266 <span class="preprocessor"> L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \</span> <a name="l00267"></a>00267 <span class="preprocessor"> L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \</span> <a name="l00268"></a>00268 <span class="preprocessor"> L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \</span> <a name="l00269"></a>00269 <span class="preprocessor"> L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \</span> <a name="l00270"></a>00270 <span class="preprocessor"> L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \</span> <a name="l00271"></a>00271 <span class="preprocessor"> L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \</span> <a name="l00272"></a>00272 <span class="preprocessor"> L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \</span> <a name="l00273"></a>00273 <span class="preprocessor"> L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \</span> <a name="l00274"></a>00274 <span class="preprocessor"> L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \</span> <a name="l00275"></a>00275 <span class="preprocessor"> L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \</span> <a name="l00276"></a>00276 <span class="preprocessor"> L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \</span> <a name="l00277"></a>00277 <span class="preprocessor"> L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \</span> <a name="l00278"></a>00278 <span class="preprocessor"> L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \</span> <a name="l00279"></a>00279 <span class="preprocessor"> L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \</span> <a name="l00280"></a>00280 <span class="preprocessor"> L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \</span> <a name="l00281"></a>00281 <span class="preprocessor"> L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \</span> <a name="l00282"></a>00282 <span class="preprocessor"> L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \</span> <a name="l00283"></a>00283 <span class="preprocessor"> L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \</span> <a name="l00284"></a>00284 <span class="preprocessor"> L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \</span> <a name="l00285"></a>00285 <span class="preprocessor"> L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \</span> <a name="l00286"></a>00286 <span class="preprocessor"> L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \</span> <a name="l00287"></a>00287 <span class="preprocessor"> L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \</span> <a name="l00288"></a>00288 <span class="preprocessor"> L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \</span> <a name="l00289"></a>00289 <span class="preprocessor"> L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i)</span> <a name="l00290"></a>00290 <span class="preprocessor"></span> <a name="l00291"></a>00291 <span class="preprocessor">#define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \</span> <a name="l00292"></a>00292 <span class="preprocessor"> L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \</span> <a name="l00293"></a>00293 <span class="preprocessor"> L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \</span> <a name="l00294"></a>00294 <span class="preprocessor"> L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \</span> <a name="l00295"></a>00295 <span class="preprocessor"> L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \</span> <a name="l00296"></a>00296 <span class="preprocessor"> L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \</span> <a name="l00297"></a>00297 <span class="preprocessor"> L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \</span> <a name="l00298"></a>00298 <span class="preprocessor"> L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \</span> <a name="l00299"></a>00299 <span class="preprocessor"> L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \</span> <a name="l00300"></a>00300 <span class="preprocessor"> L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \</span> <a name="l00301"></a>00301 <span class="preprocessor"> L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \</span> <a name="l00302"></a>00302 <span class="preprocessor"> L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \</span> <a name="l00303"></a>00303 <span class="preprocessor"> L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \</span> <a name="l00304"></a>00304 <span class="preprocessor"> L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \</span> <a name="l00305"></a>00305 <span class="preprocessor"> L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \</span> <a name="l00306"></a>00306 <span class="preprocessor"> L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \</span> <a name="l00307"></a>00307 <span class="preprocessor"> L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \</span> <a name="l00308"></a>00308 <span class="preprocessor"> L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \</span> <a name="l00309"></a>00309 <span class="preprocessor"> L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \</span> <a name="l00310"></a>00310 <span class="preprocessor"> L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \</span> <a name="l00311"></a>00311 <span class="preprocessor"> L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \</span> <a name="l00312"></a>00312 <span class="preprocessor"> L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \</span> <a name="l00313"></a>00313 <span class="preprocessor"> L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \</span> <a name="l00314"></a>00314 <span class="preprocessor"> L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \</span> <a name="l00315"></a>00315 <span class="preprocessor"> L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \</span> <a name="l00316"></a>00316 <span class="preprocessor"> L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \</span> <a name="l00317"></a>00317 <span class="preprocessor"> L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \</span> <a name="l00318"></a>00318 <span class="preprocessor"> L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \</span> <a name="l00319"></a>00319 <span class="preprocessor"> L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \</span> <a name="l00320"></a>00320 <span class="preprocessor"> L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \</span> <a name="l00321"></a>00321 <span class="preprocessor"> L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \</span> <a name="l00322"></a>00322 <span class="preprocessor"> L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \</span> <a name="l00323"></a>00323 <span class="preprocessor"> L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i)</span> <a name="l00324"></a>00324 <span class="preprocessor"></span> <a name="l00325"></a>00325 <span class="preprocessor">#if CRYPTOPP_BOOL_X64</span> <a name="l00326"></a>00326 <span class="preprocessor"></span> SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15) <a name="l00327"></a>00327 <span class="preprocessor">#else</span> <a name="l00328"></a>00328 <span class="preprocessor"></span> SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15) <a name="l00329"></a>00329 SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13) <a name="l00330"></a>00330 <span class="preprocessor">#endif</span> <a name="l00331"></a>00331 <span class="preprocessor"></span> AS2( mov REG_roundsLeft, REG_rounds) <a name="l00332"></a>00332 ASJ( jmp, 2, f) <a name="l00333"></a>00333 <a name="l00334"></a>00334 ASL(SSE2_Salsa_Output) <a name="l00335"></a>00335 AS2( movdqa xmm0, xmm4) <a name="l00336"></a>00336 AS2( punpckldq xmm4, xmm5) <a name="l00337"></a>00337 AS2( movdqa xmm1, xmm6) <a name="l00338"></a>00338 AS2( punpckldq xmm6, xmm7) <a name="l00339"></a>00339 AS2( movdqa xmm2, xmm4) <a name="l00340"></a>00340 AS2( punpcklqdq xmm4, xmm6) <span class="comment">// e</span> <a name="l00341"></a>00341 AS2( punpckhqdq xmm2, xmm6) <span class="comment">// f</span> <a name="l00342"></a>00342 AS2( punpckhdq xmm0, xmm5) <a name="l00343"></a>00343 AS2( punpckhdq xmm1, xmm7) <a name="l00344"></a>00344 AS2( movdqa xmm6, xmm0) <a name="l00345"></a>00345 AS2( punpcklqdq xmm0, xmm1) <span class="comment">// g</span> <a name="l00346"></a>00346 AS2( punpckhqdq xmm6, xmm1) <span class="comment">// h</span> <a name="l00347"></a>00347 AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1) <a name="l00348"></a>00348 AS1( ret) <a name="l00349"></a>00349 <a name="l00350"></a>00350 ASL(6) <a name="l00351"></a>00351 <span class="preprocessor">#if CRYPTOPP_BOOL_X64</span> <a name="l00352"></a>00352 <span class="preprocessor"></span> SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15) <a name="l00353"></a>00353 ASL(2) <a name="l00354"></a>00354 SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6) <a name="l00355"></a>00355 <span class="preprocessor">#else</span> <a name="l00356"></a>00356 <span class="preprocessor"></span> SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15) <a name="l00357"></a>00357 SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13) <a name="l00358"></a>00358 ASL(2) <a name="l00359"></a>00359 SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6) <a name="l00360"></a>00360 SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4) <a name="l00361"></a>00361 <span class="preprocessor">#endif</span> <a name="l00362"></a>00362 <span class="preprocessor"></span> AS2( sub REG_roundsLeft, 2) <a name="l00363"></a>00363 ASJ( jnz, 6, b) <a name="l00364"></a>00364 <a name="l00365"></a>00365 <span class="preprocessor">#define SSE2_OUTPUT_4(a, b, c, d) \</span> <a name="l00366"></a>00366 <span class="preprocessor"> AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\</span> <a name="l00367"></a>00367 <span class="preprocessor"> AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\</span> <a name="l00368"></a>00368 <span class="preprocessor"> AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\</span> <a name="l00369"></a>00369 <span class="preprocessor"> AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\</span> <a name="l00370"></a>00370 <span class="preprocessor"> AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\</span> <a name="l00371"></a>00371 <span class="preprocessor"> AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\</span> <a name="l00372"></a>00372 <span class="preprocessor"> AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\</span> <a name="l00373"></a>00373 <span class="preprocessor"> AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\</span> <a name="l00374"></a>00374 <span class="preprocessor"> ASC( call, SSE2_Salsa_Output)</span> <a name="l00375"></a>00375 <span class="preprocessor"></span> <a name="l00376"></a>00376 SSE2_OUTPUT_4(0, 13, 10, 7) <a name="l00377"></a>00377 SSE2_OUTPUT_4(4, 1, 14, 11) <a name="l00378"></a>00378 SSE2_OUTPUT_4(8, 5, 2, 15) <a name="l00379"></a>00379 SSE2_OUTPUT_4(12, 9, 6, 3) <a name="l00380"></a>00380 AS2( test REG_input, REG_input) <a name="l00381"></a>00381 ASJ( jz, 9, f) <a name="l00382"></a>00382 AS2( add REG_input, 12*16) <a name="l00383"></a>00383 ASL(9) <a name="l00384"></a>00384 AS2( add REG_output, 12*16) <a name="l00385"></a>00385 AS2( sub REG_iterationCount, 4) <a name="l00386"></a>00386 AS2( cmp REG_iterationCount, 4) <a name="l00387"></a>00387 ASJ( jge, 1, b) <a name="l00388"></a>00388 AS_POP_IF86( sp) <a name="l00389"></a>00389 <a name="l00390"></a>00390 ASL(5) <a name="l00391"></a>00391 AS2( sub REG_iterationCount, 1) <a name="l00392"></a>00392 ASJ( jl, 4, f) <a name="l00393"></a>00393 AS2( movdqa xmm0, [REG_state + 0*16]) <a name="l00394"></a>00394 AS2( movdqa xmm1, [REG_state + 1*16]) <a name="l00395"></a>00395 AS2( movdqa xmm2, [REG_state + 2*16]) <a name="l00396"></a>00396 AS2( movdqa xmm3, [REG_state + 3*16]) <a name="l00397"></a>00397 AS2( mov REG_roundsLeft, REG_rounds) <a name="l00398"></a>00398 <a name="l00399"></a>00399 ASL(0) <a name="l00400"></a>00400 SSE2_QUARTER_ROUND(0, 1, 3, 7) <a name="l00401"></a>00401 SSE2_QUARTER_ROUND(1, 2, 0, 9) <a name="l00402"></a>00402 SSE2_QUARTER_ROUND(2, 3, 1, 13) <a name="l00403"></a>00403 SSE2_QUARTER_ROUND(3, 0, 2, 18) <a name="l00404"></a>00404 ASS( pshufd xmm1, xmm1, 2, 1, 0, 3) <a name="l00405"></a>00405 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2) <a name="l00406"></a>00406 ASS( pshufd xmm3, xmm3, 0, 3, 2, 1) <a name="l00407"></a>00407 SSE2_QUARTER_ROUND(0, 3, 1, 7) <a name="l00408"></a>00408 SSE2_QUARTER_ROUND(3, 2, 0, 9) <a name="l00409"></a>00409 SSE2_QUARTER_ROUND(2, 1, 3, 13) <a name="l00410"></a>00410 SSE2_QUARTER_ROUND(1, 0, 2, 18) <a name="l00411"></a>00411 ASS( pshufd xmm1, xmm1, 0, 3, 2, 1) <a name="l00412"></a>00412 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2) <a name="l00413"></a>00413 ASS( pshufd xmm3, xmm3, 2, 1, 0, 3) <a name="l00414"></a>00414 AS2( sub REG_roundsLeft, 2) <a name="l00415"></a>00415 ASJ( jnz, 0, b) <a name="l00416"></a>00416 <a name="l00417"></a>00417 AS2( paddd xmm0, [REG_state + 0*16]) <a name="l00418"></a>00418 AS2( paddd xmm1, [REG_state + 1*16]) <a name="l00419"></a>00419 AS2( paddd xmm2, [REG_state + 2*16]) <a name="l00420"></a>00420 AS2( paddd xmm3, [REG_state + 3*16]) <a name="l00421"></a>00421 <a name="l00422"></a>00422 AS2( add dword ptr [REG_state + 8*4], 1) <a name="l00423"></a>00423 AS2( adc dword ptr [REG_state + 5*4], 0) <a name="l00424"></a>00424 <a name="l00425"></a>00425 AS2( pcmpeqb xmm6, xmm6) <span class="comment">// all ones</span> <a name="l00426"></a>00426 AS2( psrlq xmm6, 32) <span class="comment">// lo32 mask</span> <a name="l00427"></a>00427 ASS( pshufd xmm7, xmm6, 0, 1, 2, 3) <span class="comment">// hi32 mask</span> <a name="l00428"></a>00428 AS2( movdqa xmm4, xmm0) <a name="l00429"></a>00429 AS2( movdqa xmm5, xmm3) <a name="l00430"></a>00430 AS2( pand xmm0, xmm7) <a name="l00431"></a>00431 AS2( pand xmm4, xmm6) <a name="l00432"></a>00432 AS2( pand xmm3, xmm6) <a name="l00433"></a>00433 AS2( pand xmm5, xmm7) <a name="l00434"></a>00434 AS2( por xmm4, xmm5) <span class="comment">// 0,13,2,15</span> <a name="l00435"></a>00435 AS2( movdqa xmm5, xmm1) <a name="l00436"></a>00436 AS2( pand xmm1, xmm7) <a name="l00437"></a>00437 AS2( pand xmm5, xmm6) <a name="l00438"></a>00438 AS2( por xmm0, xmm5) <span class="comment">// 4,1,6,3</span> <a name="l00439"></a>00439 AS2( pand xmm6, xmm2) <a name="l00440"></a>00440 AS2( pand xmm2, xmm7) <a name="l00441"></a>00441 AS2( por xmm1, xmm6) <span class="comment">// 8,5,10,7</span> <a name="l00442"></a>00442 AS2( por xmm2, xmm3) <span class="comment">// 12,9,14,11</span> <a name="l00443"></a>00443 <a name="l00444"></a>00444 AS2( movdqa xmm5, xmm4) <a name="l00445"></a>00445 AS2( movdqa xmm6, xmm0) <a name="l00446"></a>00446 AS3( shufpd xmm4, xmm1, 2) <span class="comment">// 0,13,10,7</span> <a name="l00447"></a>00447 AS3( shufpd xmm0, xmm2, 2) <span class="comment">// 4,1,14,11</span> <a name="l00448"></a>00448 AS3( shufpd xmm1, xmm5, 2) <span class="comment">// 8,5,2,15</span> <a name="l00449"></a>00449 AS3( shufpd xmm2, xmm6, 2) <span class="comment">// 12,9,6,3</span> <a name="l00450"></a>00450 <a name="l00451"></a>00451 <span class="comment">// output keystream</span> <a name="l00452"></a>00452 AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4) <a name="l00453"></a>00453 ASJ( jmp, 5, b) <a name="l00454"></a>00454 ASL(4) <a name="l00455"></a>00455 <a name="l00456"></a>00456 AS_POP_IF86( bp) <a name="l00457"></a>00457 <span class="preprocessor">#ifdef __GNUC__</span> <a name="l00458"></a>00458 <span class="preprocessor"></span> AS_POP_IF86( bx) <a name="l00459"></a>00459 <span class="stringliteral">".att_syntax prefix;"</span> <a name="l00460"></a>00460 : <a name="l00461"></a>00461 <span class="preprocessor"> #if CRYPTOPP_BOOL_X64</span> <a name="l00462"></a>00462 <span class="preprocessor"></span> : <span class="stringliteral">"r"</span> (m_rounds), <span class="stringliteral">"r"</span> (input), <span class="stringliteral">"r"</span> (iterationCount), <span class="stringliteral">"r"</span> (m_state.data()), <span class="stringliteral">"r"</span> (output), <span class="stringliteral">"r"</span> (workspace.m_ptr) <a name="l00463"></a>00463 : <span class="stringliteral">"%eax"</span>, <span class="stringliteral">"%edx"</span>, <span class="stringliteral">"memory"</span>, <span class="stringliteral">"cc"</span>, <span class="stringliteral">"%xmm0"</span>, <span class="stringliteral">"%xmm1"</span>, <span class="stringliteral">"%xmm2"</span>, <span class="stringliteral">"%xmm3"</span>, <span class="stringliteral">"%xmm4"</span>, <span class="stringliteral">"%xmm5"</span>, <span class="stringliteral">"%xmm6"</span>, <span class="stringliteral">"%xmm7"</span>, <span class="stringliteral">"%xmm8"</span>, <span class="stringliteral">"%xmm9"</span>, <span class="stringliteral">"%xmm10"</span>, <span class="stringliteral">"%xmm11"</span>, <span class="stringliteral">"%xmm12"</span>, <span class="stringliteral">"%xmm13"</span>, <span class="stringliteral">"%xmm14"</span>, <span class="stringliteral">"%xmm15"</span> <a name="l00464"></a>00464 #<span class="keywordflow">else</span> <a name="l00465"></a>00465 : <span class="stringliteral">"d"</span> (m_rounds), <span class="stringliteral">"a"</span> (input), <span class="stringliteral">"c"</span> (iterationCount), <span class="stringliteral">"S"</span> (m_state.data()), <span class="stringliteral">"D"</span> (output) <a name="l00466"></a>00466 : <span class="stringliteral">"memory"</span>, <span class="stringliteral">"cc"</span> <a name="l00467"></a>00467 <span class="preprocessor"> #endif</span> <a name="l00468"></a>00468 <span class="preprocessor"></span> ); <a name="l00469"></a>00469 <span class="preprocessor">#endif</span> <a name="l00470"></a>00470 <span class="preprocessor"></span><span class="preprocessor">#ifdef CRYPTOPP_GENERATE_X64_MASM</span> <a name="l00471"></a>00471 <span class="preprocessor"></span> movdqa xmm6, [rsp + 0200h] <a name="l00472"></a>00472 movdqa xmm7, [rsp + 0210h] <a name="l00473"></a>00473 movdqa xmm8, [rsp + 0220h] <a name="l00474"></a>00474 movdqa xmm9, [rsp + 0230h] <a name="l00475"></a>00475 movdqa xmm10, [rsp + 0240h] <a name="l00476"></a>00476 movdqa xmm11, [rsp + 0250h] <a name="l00477"></a>00477 movdqa xmm12, [rsp + 0260h] <a name="l00478"></a>00478 movdqa xmm13, [rsp + 0270h] <a name="l00479"></a>00479 movdqa xmm14, [rsp + 0280h] <a name="l00480"></a>00480 movdqa xmm15, [rsp + 0290h] <a name="l00481"></a>00481 add rsp, 10*16 + 32*16 + 8 <a name="l00482"></a>00482 ret <a name="l00483"></a>00483 Salsa20_OperateKeystream ENDP <a name="l00484"></a>00484 <span class="preprocessor">#else</span> <a name="l00485"></a>00485 <span class="preprocessor"></span> } <a name="l00486"></a>00486 <span class="keywordflow">else</span> <a name="l00487"></a>00487 <span class="preprocessor">#endif</span> <a name="l00488"></a>00488 <span class="preprocessor"></span><span class="preprocessor">#endif</span> <a name="l00489"></a>00489 <span class="preprocessor"></span><span class="preprocessor">#ifndef CRYPTOPP_GENERATE_X64_MASM</span> <a name="l00490"></a>00490 <span class="preprocessor"></span> { <a name="l00491"></a>00491 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; <a name="l00492"></a>00492 <a name="l00493"></a>00493 <span class="keywordflow">while</span> (iterationCount--) <a name="l00494"></a>00494 { <a name="l00495"></a>00495 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; <a name="l00496"></a>00496 x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7]; <a name="l00497"></a>00497 x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11]; <a name="l00498"></a>00498 x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15]; <a name="l00499"></a>00499 <a name="l00500"></a>00500 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i=m_rounds; i>0; i-=2) <a name="l00501"></a>00501 { <a name="l00502"></a>00502 <span class="preprocessor"> #define QUARTER_ROUND(a, b, c, d) \</span> <a name="l00503"></a>00503 <span class="preprocessor"> b = b ^ rotlFixed(a + d, 7); \</span> <a name="l00504"></a>00504 <span class="preprocessor"> c = c ^ rotlFixed(b + a, 9); \</span> <a name="l00505"></a>00505 <span class="preprocessor"> d = d ^ rotlFixed(c + b, 13); \</span> <a name="l00506"></a>00506 <span class="preprocessor"> a = a ^ rotlFixed(d + c, 18);</span> <a name="l00507"></a>00507 <span class="preprocessor"></span> <a name="l00508"></a>00508 QUARTER_ROUND(x0, x4, x8, x12) <a name="l00509"></a>00509 QUARTER_ROUND(x1, x5, x9, x13) <a name="l00510"></a>00510 QUARTER_ROUND(x2, x6, x10, x14) <a name="l00511"></a>00511 QUARTER_ROUND(x3, x7, x11, x15) <a name="l00512"></a>00512 <a name="l00513"></a>00513 QUARTER_ROUND(x0, x13, x10, x7) <a name="l00514"></a>00514 QUARTER_ROUND(x1, x14, x11, x4) <a name="l00515"></a>00515 QUARTER_ROUND(x2, x15, x8, x5) <a name="l00516"></a>00516 QUARTER_ROUND(x3, x12, x9, x6) <a name="l00517"></a>00517 } <a name="l00518"></a>00518 <a name="l00519"></a>00519 <span class="preprocessor">#define SALSA_OUTPUT(x) {\</span> <a name="l00520"></a>00520 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\</span> <a name="l00521"></a>00521 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\</span> <a name="l00522"></a>00522 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\</span> <a name="l00523"></a>00523 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\</span> <a name="l00524"></a>00524 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\</span> <a name="l00525"></a>00525 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\</span> <a name="l00526"></a>00526 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\</span> <a name="l00527"></a>00527 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\</span> <a name="l00528"></a>00528 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\</span> <a name="l00529"></a>00529 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\</span> <a name="l00530"></a>00530 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\</span> <a name="l00531"></a>00531 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\</span> <a name="l00532"></a>00532 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\</span> <a name="l00533"></a>00533 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\</span> <a name="l00534"></a>00534 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\</span> <a name="l00535"></a>00535 <span class="preprocessor"> CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);}</span> <a name="l00536"></a>00536 <span class="preprocessor"></span> <a name="l00537"></a>00537 <span class="preprocessor">#ifndef CRYPTOPP_DOXYGEN_PROCESSING</span> <a name="l00538"></a>00538 <span class="preprocessor"></span> CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SALSA_OUTPUT, BYTES_PER_ITERATION); <a name="l00539"></a>00539 <span class="preprocessor">#endif</span> <a name="l00540"></a>00540 <span class="preprocessor"></span> <a name="l00541"></a>00541 <span class="keywordflow">if</span> (++m_state[8] == 0) <a name="l00542"></a>00542 ++m_state[5]; <a name="l00543"></a>00543 } <a name="l00544"></a>00544 } <a name="l00545"></a>00545 } <span class="comment">// see comment above if an internal compiler error occurs here</span> <a name="l00546"></a>00546 <a name="l00547"></a>00547 <span class="keywordtype">void</span> XSalsa20_Policy::CipherSetKey(<span class="keyword">const</span> <a class="code" href="class_name_value_pairs.html" title="interface for retrieving values given their names">NameValuePairs</a> &params, <span class="keyword">const</span> byte *key, <span class="keywordtype">size_t</span> length) <a name="l00548"></a>00548 { <a name="l00549"></a>00549 m_rounds = params.<a class="code" href="class_name_value_pairs.html#ac269314685b737912d3499f4a9399618" title="get a named value with type int, with default">GetIntValueWithDefault</a>(Name::Rounds(), 20); <a name="l00550"></a>00550 <a name="l00551"></a>00551 <span class="keywordflow">if</span> (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20)) <a name="l00552"></a>00552 <span class="keywordflow">throw</span> <a class="code" href="class_invalid_rounds.html" title="_">InvalidRounds</a>(XSalsa20::StaticAlgorithmName(), m_rounds); <a name="l00553"></a>00553 <a name="l00554"></a>00554 GetUserKey(LITTLE_ENDIAN_ORDER, m_key.begin(), m_key.size(), key, length); <a name="l00555"></a>00555 <span class="keywordflow">if</span> (length == 16) <a name="l00556"></a>00556 memcpy(m_key.begin()+4, m_key.begin(), 16); <a name="l00557"></a>00557 <a name="l00558"></a>00558 <span class="comment">// "expand 32-byte k"</span> <a name="l00559"></a>00559 m_state[0] = 0x61707865; <a name="l00560"></a>00560 m_state[1] = 0x3320646e; <a name="l00561"></a>00561 m_state[2] = 0x79622d32; <a name="l00562"></a>00562 m_state[3] = 0x6b206574; <a name="l00563"></a>00563 } <a name="l00564"></a>00564 <a name="l00565"></a>00565 <span class="keywordtype">void</span> XSalsa20_Policy::CipherResynchronize(byte *keystreamBuffer, <span class="keyword">const</span> byte *IV, <span class="keywordtype">size_t</span> length) <a name="l00566"></a>00566 { <a name="l00567"></a>00567 assert(length==24); <a name="l00568"></a>00568 <a name="l00569"></a>00569 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; <a name="l00570"></a>00570 <a name="l00571"></a>00571 <a class="code" href="class_get_block.html">GetBlock<word32, LittleEndian></a> <span class="keyword">get</span>(IV); <a name="l00572"></a>00572 <span class="keyword">get</span>(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]); <a name="l00573"></a>00573 <a name="l00574"></a>00574 x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3]; <a name="l00575"></a>00575 x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7]; <a name="l00576"></a>00576 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; <a name="l00577"></a>00577 <a name="l00578"></a>00578 <span class="keywordflow">for</span> (<span class="keywordtype">int</span> i=m_rounds; i>0; i-=2) <a name="l00579"></a>00579 { <a name="l00580"></a>00580 QUARTER_ROUND(x0, x4, x8, x12) <a name="l00581"></a>00581 QUARTER_ROUND(x1, x5, x9, x13) <a name="l00582"></a>00582 QUARTER_ROUND(x2, x6, x10, x14) <a name="l00583"></a>00583 QUARTER_ROUND(x3, x7, x11, x15) <a name="l00584"></a>00584 <a name="l00585"></a>00585 QUARTER_ROUND(x0, x13, x10, x7) <a name="l00586"></a>00586 QUARTER_ROUND(x1, x14, x11, x4) <a name="l00587"></a>00587 QUARTER_ROUND(x2, x15, x8, x5) <a name="l00588"></a>00588 QUARTER_ROUND(x3, x12, x9, x6) <a name="l00589"></a>00589 } <a name="l00590"></a>00590 <a name="l00591"></a>00591 m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3; <a name="l00592"></a>00592 m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5; <a name="l00593"></a>00593 m_state[8] = m_state[5] = 0; <a name="l00594"></a>00594 } <a name="l00595"></a>00595 <a name="l00596"></a>00596 NAMESPACE_END <a name="l00597"></a>00597 <a name="l00598"></a>00598 <span class="preprocessor">#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM</span> </pre></div></div> <hr size="1"/><address style="text-align: right;"><small>Generated on 9 Dec 2009 for Crypto++ by <a href="http://www.doxygen.org/index.html"> <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.6.1 </small></address> </body> </html>