Support for native key transformation based on the method of Keepassdroid (performance improvement)

2025-02-11 20:40:10 -05:00 · 2013-03-30 23:05:49 +01:00 · 2013-03-30 23:05:49 +01:00 · 5ca110fd66
commit 5ca110fd66
parent de0dd676ef
54 changed files with 10932 additions and 15 deletions
--- a/src/KeePass.sln
+++ b/src/KeePass.sln
@ -5,6 +5,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "KeePassLib2Android", "KeePa
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keepass2android", "keepass2android\keepass2android.csproj", "{A6CF8A86-37C1-4197-80FE-519DE2C842F5}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kp2akeytransform", "kp2akeytransform\kp2akeytransform.csproj", "{A57B3ACE-5634-469A-88C4-858BB409F356}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@ -36,6 +38,24 @@ Global
 		{545B4A6B-8BBA-4FBE-92FC-4AC060122A54}.Release|x64.Build.0 = Release|Any CPU
 		{545B4A6B-8BBA-4FBE-92FC-4AC060122A54}.ReleaseNoNet|Any CPU.ActiveCfg = ReleaseNoNet|Any CPU
 		{545B4A6B-8BBA-4FBE-92FC-4AC060122A54}.ReleaseNoNet|Any CPU.Build.0 = ReleaseNoNet|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Debug|Win32.ActiveCfg = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Debug|Win32.Build.0 = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Debug|x64.Build.0 = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Release|Any CPU.Build.0 = Release|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Release|Win32.ActiveCfg = Release|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Release|Win32.Build.0 = Release|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Release|x64.ActiveCfg = Release|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.Release|x64.Build.0 = Release|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.ReleaseNoNet|Any CPU.ActiveCfg = Debug|Any CPU
+		{A57B3ACE-5634-469A-88C4-858BB409F356}.ReleaseNoNet|Any CPU.Build.0 = Debug|Any CPU
 		{A6CF8A86-37C1-4197-80FE-519DE2C842F5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A6CF8A86-37C1-4197-80FE-519DE2C842F5}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A6CF8A86-37C1-4197-80FE-519DE2C842F5}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
--- a/src/KeePassLib2Android/KeePassLib2Android.csproj
+++ b/src/KeePassLib2Android/KeePassLib2Android.csproj
@ -149,4 +149,10 @@
    <AndroidResource Include="Resources\values\Strings.xml" />
  </ItemGroup>
  <Import Project="$(MSBuildExtensionsPath)\Novell\Novell.MonoDroid.CSharp.targets" />
+  <ItemGroup>
+    <ProjectReference Include="..\kp2akeytransform\kp2akeytransform.csproj">
+      <Project>{A57B3ACE-5634-469A-88C4-858BB409F356}</Project>
+      <Name>kp2akeytransform</Name>
+    </ProjectReference>
+  </ItemGroup>
 </Project>
--- a/src/KeePassLib2Android/Keys/CompositeKey.cs
+++ b/src/KeePassLib2Android/Keys/CompositeKey.cs
@ -246,22 +246,35 @@ namespace KeePassLib.Keys
 			ulong uNumRounds)
 		{
 			Debug.Assert((pbOriginalKey32 != null) && (pbOriginalKey32.Length == 32));
-			if(pbOriginalKey32 == null) throw new ArgumentNullException("pbOriginalKey32");
-			if(pbOriginalKey32.Length != 32) throw new ArgumentException();
+			if (pbOriginalKey32 == null)
+				throw new ArgumentNullException("pbOriginalKey32");
+			if (pbOriginalKey32.Length != 32)
+				throw new ArgumentException();

 			Debug.Assert((pbKeySeed32 != null) && (pbKeySeed32.Length == 32));
-			if(pbKeySeed32 == null) throw new ArgumentNullException("pbKeySeed32");
-			if(pbKeySeed32.Length != 32) throw new ArgumentException();
+			if (pbKeySeed32 == null)
+				throw new ArgumentNullException("pbKeySeed32");
+			if (pbKeySeed32.Length != 32)
+				throw new ArgumentException();

 			byte[] pbNewKey = new byte[32];
 			Array.Copy(pbOriginalKey32, pbNewKey, pbNewKey.Length);

 			// Try to use the native library first
-			if(NativeLib.TransformKey256(pbNewKey, pbKeySeed32, uNumRounds))
-				return (new SHA256Managed()).ComputeHash(pbNewKey);
+			Stopwatch sw = new Stopwatch();
+			sw.Start();
+			if (NativeLib.TransformKey256(pbNewKey, pbKeySeed32, uNumRounds))
+			{
+				sw.Stop();
+				Android.Util.Log.Debug("DEBUG", "Native transform:" +sw.ElapsedMilliseconds+"ms");
+				return pbNewKey;
+			}

+			sw.Restart();
 			if(TransformKeyManaged(pbNewKey, pbKeySeed32, uNumRounds) == false)
 				return null;
+			sw.Stop();
+			Android.Util.Log.Debug("DEBUG", "Managed transform:" +sw.ElapsedMilliseconds+"ms");

 			SHA256Managed sha256 = new SHA256Managed();
 			return sha256.ComputeHash(pbNewKey);
--- a/src/KeePassLib2Android/Native/NativeLib.cs
+++ b/src/KeePassLib2Android/Native/NativeLib.cs
@ -162,19 +162,20 @@ namespace KeePassLib.Native
 		{
 			if(m_bAllowNative == false) return false;

-			KeyValuePair<IntPtr, IntPtr> kvp = PrepareArrays256(pBuf256, pKey256);
-			bool bResult = false;
-
 			try
 			{
-				bResult = NativeMethods.TransformKey(kvp.Key, kvp.Value, uRounds);
+				//Android.Util.Log.Debug("DEBUG", "4+1"+new Kp2atest.TestClass().Add1(4));
+				Com.Keepassdroid.Crypto.Finalkey.NativeFinalKey key = new Com.Keepassdroid.Crypto.Finalkey.NativeFinalKey();
+
+				byte[] newKey = key.TransformMasterKey(pKey256, pBuf256, (int)uRounds);
+				Array.Copy(newKey, pBuf256, newKey.Length);
+			}
+			catch(Exception e) 
+			{ 
+				return false; 
 			}
-			catch(Exception) { bResult = false; }

-			if(bResult) GetBuffers256(kvp, pBuf256, pKey256);
-
-			NativeLib.FreeArrays(kvp);
-			return bResult;
+			return true;
 		}

 		/// <summary>
--- a/src/java/kp2akeytransform/.classpath
+++ b/src/java/kp2akeytransform/.classpath
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/>
+	<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.LIBRARIES"/>
+	<classpathentry kind="src" path="src"/>
+	<classpathentry kind="src" path="gen"/>
+	<classpathentry kind="output" path="bin/classes"/>
+</classpath>
--- a/src/java/kp2akeytransform/.gitignore
+++ b/src/java/kp2akeytransform/.gitignore
@ -0,0 +1,5 @@
+build.properties
+local.properties
+bin
+gen
+obj
--- a/src/java/kp2akeytransform/.project
+++ b/src/java/kp2akeytransform/.project
@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>kp2akeytransform</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>com.android.ide.eclipse.adt.PreCompilerBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>com.android.ide.eclipse.adt.ApkBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>com.android.ide.eclipse.adt.AndroidNature</nature>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
--- a/src/java/kp2akeytransform/AndroidManifest.xml
+++ b/src/java/kp2akeytransform/AndroidManifest.xml
@ -0,0 +1,14 @@
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="keepass2android.kp2akeytransform"
+    android:versionCode="1"
+    android:versionName="1.0" >
+
+    <uses-sdk
+        android:minSdkVersion="8"
+        android:targetSdkVersion="17" />
+
+    <application
+        android:allowBackup="true">
+    </application>
+
+</manifest>
--- a/src/java/kp2akeytransform/COPYING.gpl-2.0
+++ b/src/java/kp2akeytransform/COPYING.gpl-2.0
@ -0,0 +1,339 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
--- a/src/java/kp2akeytransform/jni/.gitignore
+++ b/src/java/kp2akeytransform/jni/.gitignore
@ -0,0 +1,3 @@
+openssl-0.9.8l
+aes-src-29-04-09.zip
+sha2-07-01-07.zip
--- a/src/java/kp2akeytransform/jni/Android.mk
+++ b/src/java/kp2akeytransform/jni/Android.mk
@ -0,0 +1,3 @@
+# Recursively sources all Android.mk files in subdirs:
+include $(call all-subdir-makefiles)
+
--- a/src/java/kp2akeytransform/jni/Application.mk
+++ b/src/java/kp2akeytransform/jni/Application.mk
@ -0,0 +1,3 @@
+APP_MODULES      := aes sha final-key
+APP_OPTIM        := release
+APP_ABI          := armeabi armeabi-v7a x86 mips
--- a/src/java/kp2akeytransform/jni/aes/Android.mk
+++ b/src/java/kp2akeytransform/jni/aes/Android.mk
@ -0,0 +1,13 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := aes
+
+LOCAL_SRC_FILES := \
+	aescrypt.c \
+	aeskey.c \
+	aes_modes.c \
+	aestab.c
+
+include $(BUILD_STATIC_LIBRARY)
--- a/src/java/kp2akeytransform/jni/aes/aes.h
+++ b/src/java/kp2akeytransform/jni/aes/aes.h
@ -0,0 +1,205 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+
+ This file contains the definitions required to use AES in C. See aesopt.h
+ for optimisation details.
+*/
+
+#ifndef _AES_H
+#define _AES_H
+
+#include <stdlib.h>
+
+/*  This include is used to find 8 & 32 bit unsigned integer types  */
+#include "brg_types.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define AES_128     /* if a fast 128 bit key scheduler is needed    */
+#define AES_192     /* if a fast 192 bit key scheduler is needed    */
+#define AES_256     /* if a fast 256 bit key scheduler is needed    */
+#define AES_VAR     /* if variable key size scheduler is needed     */
+#define AES_MODES   /* if support is needed for modes               */
+
+/* The following must also be set in assembler files if being used  */
+
+#define AES_ENCRYPT /* if support for encryption is needed          */
+#define AES_DECRYPT /* if support for decryption is needed          */
+#define AES_REV_DKS /* define to reverse decryption key schedule    */
+
+#define AES_BLOCK_SIZE  16  /* the AES block size in bytes          */
+#define N_COLS           4  /* the number of columns in the state   */
+
+/* The key schedule length is 11, 13 or 15 16-byte blocks for 128,  */
+/* 192 or 256-bit keys respectively. That is 176, 208 or 240 bytes  */
+/* or 44, 52 or 60 32-bit words.                                    */
+
+#if defined( AES_VAR ) || defined( AES_256 )
+#define KS_LENGTH       60
+#elif defined( AES_192 )
+#define KS_LENGTH       52
+#else
+#define KS_LENGTH       44
+#endif
+
+#define AES_RETURN INT_RETURN
+
+/* the character array 'inf' in the following structures is used    */
+/* to hold AES context information. This AES code uses cx->inf.b[0] */
+/* to hold the number of rounds multiplied by 16. The other three   */
+/* elements can be used by code that implements additional modes    */
+
+typedef union
+{   uint_32t l;
+    uint_8t b[4];
+} aes_inf;
+
+typedef struct
+{   uint_32t ks[KS_LENGTH];
+    aes_inf inf;
+} aes_encrypt_ctx;
+
+typedef struct
+{   uint_32t ks[KS_LENGTH];
+    aes_inf inf;
+} aes_decrypt_ctx;
+
+/* This routine must be called before first use if non-static       */
+/* tables are being used                                            */
+
+AES_RETURN aes_init(void);
+
+/* Key lengths in the range 16 <= key_len <= 32 are given in bytes, */
+/* those in the range 128 <= key_len <= 256 are given in bits       */
+
+#if defined( AES_ENCRYPT )
+
+#if defined( AES_128 ) || defined( AES_VAR)
+AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_192 ) || defined( AES_VAR)
+AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_256 ) || defined( AES_VAR)
+AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_VAR )
+AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]);
+#endif
+
+AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]);
+
+#endif
+
+#if defined( AES_DECRYPT )
+
+#if defined( AES_128 ) || defined( AES_VAR)
+AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_192 ) || defined( AES_VAR)
+AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_256 ) || defined( AES_VAR)
+AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
+#endif
+
+#if defined( AES_VAR )
+AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]);
+#endif
+
+AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]);
+
+#endif
+
+#if defined( AES_MODES )
+
+/* Multiple calls to the following subroutines for multiple block   */
+/* ECB, CBC, CFB, OFB and CTR mode encryption can be used to handle */
+/* long messages incremantally provided that the context AND the iv */
+/* are preserved between all such calls.  For the ECB and CBC modes */
+/* each individual call within a series of incremental calls must   */
+/* process only full blocks (i.e. len must be a multiple of 16) but */
+/* the CFB, OFB and CTR mode calls can handle multiple incremental  */
+/* calls of any length. Each mode is reset when a new AES key is    */
+/* set but ECB and CBC operations can be reset without setting a    */
+/* new key by setting a new IV value.  To reset CFB, OFB and CTR    */
+/* without setting the key, aes_mode_reset() must be called and the */
+/* IV must be set.  NOTE: All these calls update the IV on exit so  */
+/* this has to be reset if a new operation with the same IV as the  */
+/* previous one is required (or decryption follows encryption with  */
+/* the same IV array).                                              */
+
+AES_RETURN aes_test_alignment_detection(unsigned int n);
+
+AES_RETURN aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_encrypt_ctx cx[1]);
+
+AES_RETURN aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_decrypt_ctx cx[1]);
+
+AES_RETURN aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_encrypt_ctx cx[1]);
+
+AES_RETURN aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_decrypt_ctx cx[1]);
+
+AES_RETURN aes_mode_reset(aes_encrypt_ctx cx[1]);
+
+AES_RETURN aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+AES_RETURN aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+#define aes_ofb_encrypt aes_ofb_crypt
+#define aes_ofb_decrypt aes_ofb_crypt
+
+AES_RETURN aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+typedef void cbuf_inc(unsigned char *cbuf);
+
+#define aes_ctr_encrypt aes_ctr_crypt
+#define aes_ctr_decrypt aes_ctr_crypt
+
+AES_RETURN aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf,
+            int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1]);
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/src/java/kp2akeytransform/jni/aes/aes.txt
+++ b/src/java/kp2akeytransform/jni/aes/aes.txt
@ -0,0 +1,556 @@
+
+An AES (Rijndael) Implementation in C/C++ (as specified in FIPS-197)
+====================================================================
+
+Changes in this Version (16/04/2007)
+====================================
+
+These changes remove errors in the VC++ build files and add some 
+improvements in file naming consitency and portability. There are
+no changes to overcome reported bugs in the code.
+
+1. gen_tabs() has been renamed to aes_init() to better decribe its
+   function to those not familiar with AES internals.
+
+2. via_ace.h has been renamed to aes_via_ace.h.
+
+3. Minor changes have been made to aestab.h and aestab.c to enable
+   all the code to be compiled in either C or C++.
+   
+4. The code for detecting memory alignment in aesmdoes.c has been
+   simplified and a new routine has been added:
+   
+       aes_test_alignment_detection()
+   
+   to check that the aligment test is likely to be correct.
+
+5. The addition of support for Structured Exception Handling (SEH) 
+   to YASM (well done Peter and Michael!) has allowed the AMD64 
+   x64 assembler code to be changed to comply with SEH requriements.
+       
+6. Corrections to build files (for win32 debug build).
+
+Overview
+========
+
+This code implements AES for both 32 and 64 bit systems with optional
+assembler support for x86 and AMD64/EM64T (but optimised for AMD64).
+
+The basic AES source code files are as follows:
+
+aes.h           the header file needed to use AES in C
+aescpp.h        the header file required with to use AES in C++
+aesopt.h        the header file for setting options (and some common code)
+aestab.h        the header file for the AES table declaration
+aescrypt.c      the main C source code file for encryption and decryption
+aeskey.c        the main C source code file for the key schedule
+aestab.c        the main file for the AES tables
+brg_types.h     a header defining some standard types and DLL defines
+brg_endian.h    a header containing code to detect or define endianness
+aes_x86_v1.asm  x86 assembler (YASM) alternative to aescrypt.c using
+                large tables
+aes_x86_v2.asm  x86 assembler (YASM) alternative to aescrypt.c using
+                compressed tables
+aes_amd64.asm   AMD64 assembler (YASM) alternative to aescrypt.c using
+                compressed tables
+
+In addition AES modes are implemented in the files:
+
+aes_modes.c     AES modes with optional support for VIA ACE detection and use
+aes_via_ace.h   the header file for VIA ACE support
+
+Other associated files for testing and support are:
+
+aesaux.h        header for auxilliary routines for testsing
+aesaux.c        auxilliary routines for testsingt
+aestst.h        header file for setting the testing environment
+rdtsc.h         a header file that provides access to the Time Stamp Counter
+aestst.c        a simple test program for quick tests of the AES code
+aesgav.c        a program to generate and verify the test vector files
+aesrav.c        a program to verify output against the test vector files
+aestmr.c        a program to time the code on x86 systems
+modetest.c      a program to test the AES modes support
+vbxam.doc       a demonstration of AES DLL use from Visual Basic in Microsoft Word
+vb.txt          Visual Basic code from the above example (win32 only)
+aesxam.c        an example of AES use
+tablegen.c      a program to generate a simplified 'aestab.c' file for
+                use with compilers that find aestab.c too complex
+yasm.rules      the YASM build rules file for Microsoft Visual Studio 2005
+via_ace.txt     describes support for the VIA ACE cryptography engine
+aes.txt         this file
+
+Building The AES Libraries
+--------------------------
+
+A. Versions
+-----------
+
+The code can be used to build static and dynamic libraries, each in five
+versions:
+
+    C           uses C source code only
+    ASM_X86_V1C large table x86 assembler code for encrypt/decrypt
+    ASM_X86_V2  compressed table x86 assembler for encrypt/decrypt and keying
+    ASM_X86_V2C compressed table x86 assembler code for encrypt/decrypt
+    ASM_AMD64   compressed table x86 assembler code for encrypt/decrypt
+
+The C version can be compiled for Win32 or x64, the x86 assembler versions
+are for Win32 only and the AMD64 version for x64 only.
+
+B. Types
+--------
+
+The code makes use of types defined as uint_<nn>t where <nn> is the length
+of the type, for example, the unsigned 32-bit type is 'uint_32t'.  These are
+NOT the same as the fixed width integer types in C99, inttypes.h and stdint.h
+since several attempts to use these types have shown that support for them is
+still highly variable.  But a regular expression search and replace in VC++
+with search on 'uint_{:z}t' and a replace with 'uint\1_t' will convert these
+types to C99 types (there should be similar search/replace facilities on other
+systems).
+
+C. YASM
+-------
+
+If you wish to use the x86 assembler files you will also need the YASM open
+source x86 assembler (r1331 or later) for Windows which can be obtained from:
+
+  http://www.tortall.net/projects/yasm/
+
+This assembler should be placed in the bin directory used by VC++, which, for
+Visual Stduio 2005, is typically:
+
+ C:\Program Files (x86)\Microsoft Visual Studio 8\VC\bin
+
+You will also need to move the yasm.rules file from this distribution into
+the directory where Visual Studio 2005 expects to find it, which is typically:
+
+ C:\Program Files (x86)\Microsoft Visual Studio 8\VC\VCProjectDefaults
+
+Alternatively you can configure the path for rules files within Visual Studio.
+
+D. Configuration
+----------------
+
+The following configurations are available as projects for Visual Studio 2005
+but the following descriptions should allow them to be built in other x86
+environments:
+
+    lib_generic_c       Win32 and x64
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aescrypt.c, aeskey.c, aestab.c, aes_modes.c
+        defines
+    dll_generic_c       Win32 and x64
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aescrypt.c, aeskey.c, aestab.c, aes_modes.c
+        defines         DLL_EXPORT
+
+    lib_asm_x86_v1c     Win32
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aeskey.c, aestab.c, aes_modes.c
+        x86 assembler:  aes_x86_v1.asm
+        defines         ASM_X86_V1C (set for C and assembler files)
+    dll_asm_x86_v1c     Win32
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aeskey.c, aestab.c, aes_modes.c
+        x86 assembler:  aes_x86_v1.asm
+        defines         DLL_EXPORT, ASM_X86_V1C (set for C and assembler files)
+
+    lib_asm_x86_v2c     Win32
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aeskey.c, aestab.c, aes_modes.c
+        x86 assembler:  aes_x86_v2.asm
+        defines         ASM_X86_V2C (set for C and assembler files)
+    dll_asm_x86_v2c     Win32
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aeskey.c, aestab.c, aes_modes.c
+        x86 assembler:  aes_x86_v1.asm
+        defines         DLL_EXPORT, ASM_X86_V2C (set for C and assembler files)
+
+    lib_asm_x86_v2      Win32
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aes_modes.c
+        x86 assembler:  aes_x86_v1.asm
+        defines         ASM_X86_V2 (set for C and assembler files)
+    dll_asm_x86_v2      Win32
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aes_modes.c
+        x86 assembler:  aes_x86_v1.asm
+        defines         DLL_EXPORT, ASM_AMD64_C (set for C and assembler files)
+
+    lib_asm_amd64_c     x64
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aes_modes.c
+        x86 assembler:  aes_amd64.asm
+        defines         ASM_X86_V2 (set for C and assembler files)
+    dll_asm_amd64_c     x64
+        headers:        aes.h, aesopt.h, aestab.h, brg_endian.h, tdefs,h
+        C source:       aes_modes.c
+        x86 assembler:  aes_amd64.asm
+        defines         DLL_EXPORT, ASM_AMD64_C (set for C and assembler files)
+
+Notes:
+
+ASM_X86_V1C is defined if using the version 1 assembler code (aescrypt1.asm).
+            The defines in the assember file must match those in aes.h and
+            aesopt.h).  Also remember to include/exclude the right assembler
+            and C files in the build to avoid undefined or multiply defined
+            symbols - include aescrypt1.asm and exclude aescrypt.c and
+            aescrypt2.asm.
+
+ASM_X86_V2  is defined if using the version 2 assembler code (aescrypt2.asm).
+            This version provides a full, self contained assembler version
+            and does not use any C source code files except for the mutiple
+            block encryption modes that are provided by aes_modes.c. The define
+            ASM_X86_V2 must be set on the YASM command line (or in aescrypt2.asm)
+            to use this version and all C files except aec_modes.c and. for the
+            DLL build, aestab.c must be excluded from the build.
+
+ASM_X86_V2C is defined when using the version 2 assembler code (aescrypt2.asm)
+            with faster key scheduling provided by the in C code (the options in
+            the assember file must match those in aes.h and aesopt.h).  In this
+            case aeskey.c and aestab.c are needed with aescrypt2.asm and the
+            define ASM_X86_V2C must be set for both the C files and for
+            asecrypt2.asm command lines (or in aesopt.h and aescrypt2.asm).
+            Include aescrypt2.asm aeskey.c and aestab.c, exclude aescrypt.c for
+            this option.
+
+ASM_AMD64_C is defined when using the AMD64 assembly code because the C key
+            scheduling is sued in this case.
+
+DLL_EXPORT  must be defined to generate the DLL version of the code and
+            to run tests on it
+
+DLL_IMPORT  must be defined to use the DLL version of the code in an
+            application program
+
+Directories the paths for the various directories for test vector input and
+            output have to be set in aestst.h
+
+VIA ACE     see the via_ace.txt for this item
+
+Static      The static libraries are named:
+Libraries
+                aes_lib_generic_c.lib
+                aes_lib_asm_x86_v1c.lib
+                aes_lib_asm_x86_v2.lib
+                aes_lib_asm_x86_v2c.lib
+                aes_lib_asm_amd64_c.lib
+
+            and placed in one of the the directories:
+
+                lib\win32\release\
+                lib\win32\debug\
+                lib\x64\release\
+                lib\x64\debug\
+
+            in the aes root directory depending on the platform(win32 or
+            x64) and the build (release or debug). After any of these is
+            built it is then copied into aes.lib, which is the library
+            that is subsequently used for testing. Hence testing is for
+            the last static library built.
+
+Dynamic     The static libraries are named:
+Libraries
+                aes_lib_generic_c.dll
+                aes_lib_asm_x86_v1c.dll
+                aes_lib_asm_x86_v2.dll
+                aes_lib_asm_x86_v2c.dll
+                aes_lib_asm_amd64_c.dll
+
+            and placed in one of the the directories:
+
+                dll\win32\release\
+                dll\win32\debug\
+                dll\x64\release\
+                dll\x64\debug\
+
+            in the aes root directory depending on the platform(win32 or
+            x64) and the build (release or debug).  Each DLL library:
+
+                aes_<ext>.dll
+
+            has three associated files:
+
+                aes_dll_<ext>.lib   the library file for implicit linking
+                aes_dll_<ext>.exp   the exports file
+                aes_dll_<ext>.pdb   the symbol file
+
+            After any DLL is built it and its three related files are then
+            copied into aes.lib, aes.lib, aes,exp and aes.pdb, which are
+            the libraries used for testing.  Hence testing is for the last
+            static library or DLL built.
+
+E. Testing
+----------
+
+These tests require that the test vector files are placed in the 'testvals' 
+subdirectory. If the AES Algorithm Validation Suite tests will be use3d then
+the *.fax files need to be put in the 'testvals\fax' subdirectory.  This is
+covered in more detail below.
+
+The projects test_dll and time_dll are used to test and time the last DLL
+built.  These use the files:
+
+    test_dll:       Win32 (x64 for the C and AMD64 versions)
+        headers:    aes.h, aescpp.h, brg_types.h, aesaux.h and aestst.h
+        C source:   aesaux.c, aesrav.c
+        defines:    DLL_IMPORT
+
+    time_dll:       Win32 (x64 for the C and AMD64 versions)
+        headers:    aes.h, aescpp.h, brg_types.h, aesaux.h aestst.h and rdtsc.h
+        C source:   aesaux.c, aestmr.c
+        defines:    DLL_IMPORT
+
+and link to the DLL using explicit linking. However, if the lib file associated
+with the DLL is linked into this project and the symbol DYNAMIC_LINK in aestst.h
+is left undefined, then implicit linking will be used
+
+The projects test_lib and time_lib are used to test and time the last static LIB
+built. They use the files:
+
+    test_lib:       Win32 (x64 for the C and AMD64 versions)
+        headers:    aes.h, aescpp.h, brg_types.h, aesaux.h and aestst.h
+        C source:   aesaux.c, aesrav.c
+        defines:
+
+    time_lib:       Win32 (x64 for the C and AMD64 versions)
+        headers:    aes.h, aescpp.h, brg_types.h, aesaux.h, aestst.h and rdtsc.h
+        C source:   aesaux.c, aestmr.c
+        defines:
+
+and link to the last static library built.
+
+The above test take command line arguments that determine which test are run
+as follows:
+
+    test_lib /t:[knec] /k:[468]
+    test_dll /t:[knec] /k:[468]
+
+where the symbols in square brackets can be used in any combination (without
+the brackets) and have the following meanings:
+
+        /t:[knec]   selects which tests are used
+        /k:[468]    selects the key lengths used
+        /c          compares output with reference (see later)
+
+        k: generate ECB Known Answer Test files
+        n: generate ECB Known Answer Test files (new)
+        e: generate ECB Monte Carlo Test files
+        c: generate CBC Monte Carlo Test files
+
+and the characters giving the lengths are digits representing the lengths in
+32-bit units.\n\n");
+
+The project test_modes tests the AES modes.  It uses the files:
+
+    test_modes:     Win32 or x64
+        headers:    aes.h, aescpp.h, brg_types.h, aesaux,h and aestst.h
+        C source:   aesaux.c, modetest.c
+        defines:    none for static library test, DLL_IMPORT for DLL test
+
+which again links to the last library built.
+
+F. Other Applications
+---------------------
+
+These are:
+
+    gen_tests       builds the test_vector files. The commad line is
+                        gen_tests /t:knec /k:468 /c
+                    as described earlier
+                    
+    test_aes_avs    run the AES Algorithm Validation Suite tests for
+                    ECB, CBC, CFB and OFB modes
+
+    gen_tables      builds a simple version of aes_tab.c (in aestab2.c)
+                    for compilers that cannot handle the normal version
+    aes_example     provides an example of AES use
+
+These applications are linked to the last static library built or, if
+DLL_IMPORT is defined during compilation, to the last DLL built.
+
+G. Use of the VIA ACE Cryptography Engine
+-----------------------------------------
+
+The use of the code with the VIA ACE cryptography engine in described in the
+file via_ace.txt. In outline aes_modes.c is used and USE_VIA_ACE_IF_PRESENT
+is defined either in section 2 of aesopt.h or as a compilation option in Visual
+Studio. If in addition ASSUME_VIA_ACE_PRESENT is also defined then all normal
+AES code will be removed if not needed to support VIA ACE use.  If VIA ACE
+support is needed and AES assembler is being used only the ASM_X86_V1C and
+ASM_X86_V2C versions should be used since ASM_X86_V2 and ASM_AMD64 do not
+support the VIA ACE engine.
+
+H. The AES Test Vector Files
+----------------------------
+
+These files fall in the following groups (where <nn> is a two digit
+number):
+
+1. ecbvk<nn>.txt  ECB vectors with variable key
+2. ecbvt<nn>.txt  ECB vectors with variable text
+3. ecbnk<nn>.txt  new ECB vectors with variable key
+4. ecbnt<nn>.txt  new ECB vectors with variable text
+5. ecbme<nn>.txt  ECB monte carlo encryption test vectors
+6. ecbmd<nn>.txt  ECB monte carlo decryption test vectors
+7. cbcme<nn>.txt  CBC monte carlo encryption test vectors
+8. cbcmd<nn>.txt  CBC monte carlo decryption test vectors
+
+The first digit of the numeric suffix on the filename gives the block size
+in 32 bit units and the second numeric digit gives the key size. For example,
+the file ecbvk44.txt provides the test vectors for ECB encryption with a 128
+bit block size and a 128 bit key size. The test routines expect to find these
+files in the 'testvals' subdirectory within the aes root directory. The
+'outvals' subdirectory is used for outputs that are compared with the files
+in 'testvals'. Note that the monte carlo test vectors are the result of
+applying AES iteratively 10000 times, not just once.
+
+The AES Algorithm Validation Suite tests can be run for ECB, CBC, CFB and 
+OFB modes (CFB1 and CFB8 are not implemented).  The test routine uses the 
+*.fax test files, which should be placed in the 'testvals\fax' subdirectory.
+
+I. The Basic AES Calling Interface
+----------------------------------
+
+The basic AES code keeps its state in a context, there being different 
+contexts for encryption and decryption:
+
+    aes_encrypt_ctx
+    aes_decrypt_ctx
+    
+The AES code is initialised with the call
+
+	aes_init(void)
+	
+although this is only essential if the option to generate the AES tables at 
+run-time has been set in the options (i.e.fixed tables are not being used).
+    
+The AES encryption key is set by one of the calls:
+ 
+    aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1])
+    aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1])
+    aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1])
+
+or by:
+
+    aes_encrypt_key(const unsigned char *key, int key_len, 
+                                                aes_encrypt_ctx cx[1])
+
+where the key length is set by 'key_len', which can be the length in bits 
+or bytes.  
+
+Similarly, the AES decryption key is set by one of:
+
+    aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1])
+    aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1])
+    aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1])
+
+or by:
+
+    aes_decrypt_key(const unsigned char *key, int key_len, 
+                                                aes_decrypt_ctx cx[1])
+ 
+Encryption and decryption for a single 16 byte block is then achieved using:
+
+    aes_encrypt(const unsigned char *in, unsigned char *out, 
+                                            const aes_encrypt_ctx cx[1])
+    aes_decrypt(const unsigned char *in, unsigned char *out, 
+                                            const aes_decrypt_ctx cx[1])
+                                            
+The above subroutines return a value of EXIT_SUCCESS or EXIT_FAILURE 
+depending on whether the operation succeeded or failed.
+ 
+J. The Calling Interface for the AES Modes
+------------------------------------------
+
+The subroutines for the AES modes, ECB, CBC, CFB, OFB and CTR, each process
+blocks of variable length and can also be called several times to complete 
+single mode operations incrementally on long messages (or those messages,
+not all of which are available at the same time).  The calls:
+
+    aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_encrypt_ctx cx[1])
+
+    aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_decrypt_ctx cx[1])
+
+for ECB operations and those for CBC:
+
+    aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_encrypt_ctx cx[1])
+
+    aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_decrypt_ctx cx[1])
+ 
+can only process blocks whose lengths are multiples of 16 bytes but the calls 
+for CFB, OFB and CTR mode operations:
+
+    aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1])
+
+    aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1])
+
+    aes_ofb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1])
+
+    aes_ofb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1])
+
+    aes_ctr_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+            int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1])
+
+    aes_ctr_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+            int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1])
+
+can process blocks of any length.  Note also that CFB, OFB and CTR mode calls only
+use AES encryption contexts even during decryption operations.
+
+The calls CTR mode operations use a buffer (cbuf) which holds the counter value
+together with a function parameter:
+
+    void cbuf_inc(unsigned char *cbuf);
+
+that is ued to update the counter value after each 16 byte AES operation. The 
+counter buffer is updated appropriately to allow for incremental operations.
+
+Please note the following IMPORTANT points about the AES mode subroutines:
+
+    1. All modes are reset when a new AES key is set.
+    
+    2. Incremental calls to the different modes cannot 
+       be mixed. If a change of mode is needed a new 
+       key must be set or a reset must be issued (see 
+       below).
+       
+    3. For modes with IVs, the IV value is an inpu AND
+       an ouput since it is updated after each call to 
+       the value needed for any subsequent incremental
+       call(s). If the mode is reset, the IV hence has
+       to be set (or reset) as well.
+       
+    4. ECB operations must be multiples of 16 bytes
+       but do not need to be reset for new operations.
+       
+    5. CBC operations must also be multiples of 16 
+       bytes and are reset for a new operation by 
+       setting the IV.
+       
+    6. CFB, OFB and CTR mode must be reset by setting 
+       a new IV value AND by calling:
+       
+           aes_mode_reset(aes_encrypt_ctx cx[1])
+           
+       For CTR mode the cbuf value also has to be reset.
+       
+    7. CFB, OFB and CTR modes only use AES encryption 
+       operations and contexts and do not need AES
+       decrytpion operations.
+       
+    8. AES keys remain valid across resets and changes
+       of mode (but encryption and decryption keys must 
+       both be set if they are needed).  
+       
+   Brian Gladman  22/07/2008
+   
--- a/src/java/kp2akeytransform/jni/aes/aes_amd64.asm
+++ b/src/java/kp2akeytransform/jni/aes/aes_amd64.asm
@ -0,0 +1,905 @@
+
+; ---------------------------------------------------------------------------
+; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+; 
+; LICENSE TERMS
+; 
+; The free distribution and use of this software is allowed (with or without
+; changes) provided that:
+; 
+;  1. source code distributions include the above copyright notice, this
+;     list of conditions and the following disclaimer;
+; 
+;  2. binary distributions include the above copyright notice, this list
+;     of conditions and the following disclaimer in their documentation;
+; 
+;  3. the name of the copyright holder is not used to endorse products
+;     built using this software without specific written permission.
+; 
+; DISCLAIMER
+; 
+; This software is provided 'as is' with no explicit or implied warranties
+; in respect of its properties, including, but not limited to, correctness
+; and/or fitness for purpose.
+; ---------------------------------------------------------------------------
+; Issue 20/12/2007
+;
+; I am grateful to Dag Arne Osvik for many discussions of the techniques that
+; can be used to optimise AES assembler code on AMD64/EM64T architectures.
+; Some of the techniques used in this implementation are the result of
+; suggestions made by him for which I am most grateful.
+
+; An AES implementation for AMD64 processors using the YASM assembler.  This
+; implemetation provides only encryption, decryption and hence requires key
+; scheduling support in C. It uses 8k bytes of tables but its encryption and
+; decryption performance is very close to that obtained using large tables.
+; It can use either Windows or Gnu/Linux calling conventions, which are as
+; follows:
+;               windows  gnu/linux
+;
+;   in_blk          rcx     rdi
+;   out_blk         rdx     rsi
+;   context (cx)     r8     rdx
+;
+;   preserved       rsi      -    + rbx, rbp, rsp, r12, r13, r14 & r15
+;   registers       rdi      -      on both
+;
+;   destroyed        -      rsi   + rax, rcx, rdx, r8, r9, r10 & r11
+;   registers        -      rdi     on both
+;
+; The default convention is that for windows, the gnu/linux convention being
+; used if __GNUC__ is defined.
+;
+; Define _SEH_ to include support for Win64 structured exception handling
+; (this requires YASM version 0.6 or later).
+;
+; This code provides the standard AES block size (128 bits, 16 bytes) and the
+; three standard AES key sizes (128, 192 and 256 bits). It has the same call
+; interface as my C implementation.  It uses the Microsoft C AMD64 calling
+; conventions in which the three parameters are placed in  rcx, rdx and r8
+; respectively.  The rbx, rsi, rdi, rbp and r12..r15 registers are preserved.
+;
+;     AES_RETURN aes_encrypt(const unsigned char in_blk[],
+;                   unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_decrypt(const unsigned char in_blk[],
+;                   unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
+;                                            const aes_encrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
+;                                            const aes_decrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_encrypt_key(const unsigned char key[],
+;                           unsigned int len, const aes_decrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_decrypt_key(const unsigned char key[],
+;                           unsigned int len, const aes_decrypt_ctx cx[1]);
+;
+; where <NNN> is 128, 102 or 256.  In the last two calls the length can be in
+; either bits or bytes.
+;
+; Comment in/out the following lines to obtain the desired subroutines. These
+; selections MUST match those in the C header file aes.h
+
+%define AES_128                 ; define if AES with 128 bit keys is needed
+%define AES_192                 ; define if AES with 192 bit keys is needed
+%define AES_256                 ; define if AES with 256 bit keys is needed
+%define AES_VAR                 ; define if a variable key size is needed
+%define ENCRYPTION              ; define if encryption is needed
+%define DECRYPTION              ; define if decryption is needed
+%define AES_REV_DKS             ; define if key decryption schedule is reversed
+
+%define LAST_ROUND_TABLES       ; define for the faster version using extra tables
+
+; The encryption key schedule has the following in memory layout where N is the
+; number of rounds (10, 12 or 14):
+;
+; lo: | input key (round 0)  |  ; each round is four 32-bit words
+;     | encryption round 1   |
+;     | encryption round 2   |
+;     ....
+;     | encryption round N-1 |
+; hi: | encryption round N   |
+;
+; The decryption key schedule is normally set up so that it has the same
+; layout as above by actually reversing the order of the encryption key
+; schedule in memory (this happens when AES_REV_DKS is set):
+;
+; lo: | decryption round 0   | =              | encryption round N   |
+;     | decryption round 1   | = INV_MIX_COL[ | encryption round N-1 | ]
+;     | decryption round 2   | = INV_MIX_COL[ | encryption round N-2 | ]
+;     ....                       ....
+;     | decryption round N-1 | = INV_MIX_COL[ | encryption round 1   | ]
+; hi: | decryption round N   | =              | input key (round 0)  |
+;
+; with rounds except the first and last modified using inv_mix_column()
+; But if AES_REV_DKS is NOT set the order of keys is left as it is for
+; encryption so that it has to be accessed in reverse when used for
+; decryption (although the inverse mix column modifications are done)
+;
+; lo: | decryption round 0   | =              | input key (round 0)  |
+;     | decryption round 1   | = INV_MIX_COL[ | encryption round 1   | ]
+;     | decryption round 2   | = INV_MIX_COL[ | encryption round 2   | ]
+;     ....                       ....
+;     | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
+; hi: | decryption round N   | =              | encryption round N   |
+;
+; This layout is faster when the assembler key scheduling provided here
+; is used.
+;
+; The DLL interface must use the _stdcall convention in which the number
+; of bytes of parameter space is added after an @ to the sutine's name.
+; We must also remove our parameters from the stack before return (see
+; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
+
+;%define DLL_EXPORT
+
+; End of user defines
+
+%ifdef AES_VAR
+%ifndef AES_128
+%define AES_128
+%endif
+%ifndef AES_192
+%define AES_192
+%endif
+%ifndef AES_256
+%define AES_256
+%endif
+%endif
+
+%ifdef AES_VAR
+%define KS_LENGTH       60
+%elifdef AES_256
+%define KS_LENGTH       60
+%elifdef AES_192
+%define KS_LENGTH       52
+%else
+%define KS_LENGTH       44
+%endif
+
+%define     r0  rax
+%define     r1  rdx
+%define     r2  rcx
+%define     r3  rbx
+%define     r4  rsi
+%define     r5  rdi
+%define     r6  rbp
+%define     r7  rsp
+
+%define     raxd    eax
+%define     rdxd    edx
+%define     rcxd    ecx
+%define     rbxd    ebx
+%define     rsid    esi
+%define     rdid    edi
+%define     rbpd    ebp
+%define     rspd    esp
+
+%define     raxb    al
+%define     rdxb    dl
+%define     rcxb    cl
+%define     rbxb    bl
+%define     rsib    sil
+%define     rdib    dil
+%define     rbpb    bpl
+%define     rspb    spl
+
+%define     r0h ah
+%define     r1h dh
+%define     r2h ch
+%define     r3h bh
+
+%define     r0d eax
+%define     r1d edx
+%define     r2d ecx
+%define     r3d ebx
+
+; finite field multiplies by {02}, {04} and {08}
+
+%define f2(x)   ((x<<1)^(((x>>7)&1)*0x11b))
+%define f4(x)   ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
+%define f8(x)   ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
+
+; finite field multiplies required in table generation
+
+%define f3(x)   (f2(x) ^ x)
+%define f9(x)   (f8(x) ^ x)
+%define fb(x)   (f8(x) ^ f2(x) ^ x)
+%define fd(x)   (f8(x) ^ f4(x) ^ x)
+%define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
+
+; macro for expanding S-box data
+
+%macro enc_vals 1
+    db  %1(0x63),%1(0x7c),%1(0x77),%1(0x7b),%1(0xf2),%1(0x6b),%1(0x6f),%1(0xc5)
+    db  %1(0x30),%1(0x01),%1(0x67),%1(0x2b),%1(0xfe),%1(0xd7),%1(0xab),%1(0x76)
+    db  %1(0xca),%1(0x82),%1(0xc9),%1(0x7d),%1(0xfa),%1(0x59),%1(0x47),%1(0xf0)
+    db  %1(0xad),%1(0xd4),%1(0xa2),%1(0xaf),%1(0x9c),%1(0xa4),%1(0x72),%1(0xc0)
+    db  %1(0xb7),%1(0xfd),%1(0x93),%1(0x26),%1(0x36),%1(0x3f),%1(0xf7),%1(0xcc)
+    db  %1(0x34),%1(0xa5),%1(0xe5),%1(0xf1),%1(0x71),%1(0xd8),%1(0x31),%1(0x15)
+    db  %1(0x04),%1(0xc7),%1(0x23),%1(0xc3),%1(0x18),%1(0x96),%1(0x05),%1(0x9a)
+    db  %1(0x07),%1(0x12),%1(0x80),%1(0xe2),%1(0xeb),%1(0x27),%1(0xb2),%1(0x75)
+    db  %1(0x09),%1(0x83),%1(0x2c),%1(0x1a),%1(0x1b),%1(0x6e),%1(0x5a),%1(0xa0)
+    db  %1(0x52),%1(0x3b),%1(0xd6),%1(0xb3),%1(0x29),%1(0xe3),%1(0x2f),%1(0x84)
+    db  %1(0x53),%1(0xd1),%1(0x00),%1(0xed),%1(0x20),%1(0xfc),%1(0xb1),%1(0x5b)
+    db  %1(0x6a),%1(0xcb),%1(0xbe),%1(0x39),%1(0x4a),%1(0x4c),%1(0x58),%1(0xcf)
+    db  %1(0xd0),%1(0xef),%1(0xaa),%1(0xfb),%1(0x43),%1(0x4d),%1(0x33),%1(0x85)
+    db  %1(0x45),%1(0xf9),%1(0x02),%1(0x7f),%1(0x50),%1(0x3c),%1(0x9f),%1(0xa8)
+    db  %1(0x51),%1(0xa3),%1(0x40),%1(0x8f),%1(0x92),%1(0x9d),%1(0x38),%1(0xf5)
+    db  %1(0xbc),%1(0xb6),%1(0xda),%1(0x21),%1(0x10),%1(0xff),%1(0xf3),%1(0xd2)
+    db  %1(0xcd),%1(0x0c),%1(0x13),%1(0xec),%1(0x5f),%1(0x97),%1(0x44),%1(0x17)
+    db  %1(0xc4),%1(0xa7),%1(0x7e),%1(0x3d),%1(0x64),%1(0x5d),%1(0x19),%1(0x73)
+    db  %1(0x60),%1(0x81),%1(0x4f),%1(0xdc),%1(0x22),%1(0x2a),%1(0x90),%1(0x88)
+    db  %1(0x46),%1(0xee),%1(0xb8),%1(0x14),%1(0xde),%1(0x5e),%1(0x0b),%1(0xdb)
+    db  %1(0xe0),%1(0x32),%1(0x3a),%1(0x0a),%1(0x49),%1(0x06),%1(0x24),%1(0x5c)
+    db  %1(0xc2),%1(0xd3),%1(0xac),%1(0x62),%1(0x91),%1(0x95),%1(0xe4),%1(0x79)
+    db  %1(0xe7),%1(0xc8),%1(0x37),%1(0x6d),%1(0x8d),%1(0xd5),%1(0x4e),%1(0xa9)
+    db  %1(0x6c),%1(0x56),%1(0xf4),%1(0xea),%1(0x65),%1(0x7a),%1(0xae),%1(0x08)
+    db  %1(0xba),%1(0x78),%1(0x25),%1(0x2e),%1(0x1c),%1(0xa6),%1(0xb4),%1(0xc6)
+    db  %1(0xe8),%1(0xdd),%1(0x74),%1(0x1f),%1(0x4b),%1(0xbd),%1(0x8b),%1(0x8a)
+    db  %1(0x70),%1(0x3e),%1(0xb5),%1(0x66),%1(0x48),%1(0x03),%1(0xf6),%1(0x0e)
+    db  %1(0x61),%1(0x35),%1(0x57),%1(0xb9),%1(0x86),%1(0xc1),%1(0x1d),%1(0x9e)
+    db  %1(0xe1),%1(0xf8),%1(0x98),%1(0x11),%1(0x69),%1(0xd9),%1(0x8e),%1(0x94)
+    db  %1(0x9b),%1(0x1e),%1(0x87),%1(0xe9),%1(0xce),%1(0x55),%1(0x28),%1(0xdf)
+    db  %1(0x8c),%1(0xa1),%1(0x89),%1(0x0d),%1(0xbf),%1(0xe6),%1(0x42),%1(0x68)
+    db  %1(0x41),%1(0x99),%1(0x2d),%1(0x0f),%1(0xb0),%1(0x54),%1(0xbb),%1(0x16)
+%endmacro
+
+%macro dec_vals 1
+    db  %1(0x52),%1(0x09),%1(0x6a),%1(0xd5),%1(0x30),%1(0x36),%1(0xa5),%1(0x38)
+    db  %1(0xbf),%1(0x40),%1(0xa3),%1(0x9e),%1(0x81),%1(0xf3),%1(0xd7),%1(0xfb)
+    db  %1(0x7c),%1(0xe3),%1(0x39),%1(0x82),%1(0x9b),%1(0x2f),%1(0xff),%1(0x87)
+    db  %1(0x34),%1(0x8e),%1(0x43),%1(0x44),%1(0xc4),%1(0xde),%1(0xe9),%1(0xcb)
+    db  %1(0x54),%1(0x7b),%1(0x94),%1(0x32),%1(0xa6),%1(0xc2),%1(0x23),%1(0x3d)
+    db  %1(0xee),%1(0x4c),%1(0x95),%1(0x0b),%1(0x42),%1(0xfa),%1(0xc3),%1(0x4e)
+    db  %1(0x08),%1(0x2e),%1(0xa1),%1(0x66),%1(0x28),%1(0xd9),%1(0x24),%1(0xb2)
+    db  %1(0x76),%1(0x5b),%1(0xa2),%1(0x49),%1(0x6d),%1(0x8b),%1(0xd1),%1(0x25)
+    db  %1(0x72),%1(0xf8),%1(0xf6),%1(0x64),%1(0x86),%1(0x68),%1(0x98),%1(0x16)
+    db  %1(0xd4),%1(0xa4),%1(0x5c),%1(0xcc),%1(0x5d),%1(0x65),%1(0xb6),%1(0x92)
+    db  %1(0x6c),%1(0x70),%1(0x48),%1(0x50),%1(0xfd),%1(0xed),%1(0xb9),%1(0xda)
+    db  %1(0x5e),%1(0x15),%1(0x46),%1(0x57),%1(0xa7),%1(0x8d),%1(0x9d),%1(0x84)
+    db  %1(0x90),%1(0xd8),%1(0xab),%1(0x00),%1(0x8c),%1(0xbc),%1(0xd3),%1(0x0a)
+    db  %1(0xf7),%1(0xe4),%1(0x58),%1(0x05),%1(0xb8),%1(0xb3),%1(0x45),%1(0x06)
+    db  %1(0xd0),%1(0x2c),%1(0x1e),%1(0x8f),%1(0xca),%1(0x3f),%1(0x0f),%1(0x02)
+    db  %1(0xc1),%1(0xaf),%1(0xbd),%1(0x03),%1(0x01),%1(0x13),%1(0x8a),%1(0x6b)
+    db  %1(0x3a),%1(0x91),%1(0x11),%1(0x41),%1(0x4f),%1(0x67),%1(0xdc),%1(0xea)
+    db  %1(0x97),%1(0xf2),%1(0xcf),%1(0xce),%1(0xf0),%1(0xb4),%1(0xe6),%1(0x73)
+    db  %1(0x96),%1(0xac),%1(0x74),%1(0x22),%1(0xe7),%1(0xad),%1(0x35),%1(0x85)
+    db  %1(0xe2),%1(0xf9),%1(0x37),%1(0xe8),%1(0x1c),%1(0x75),%1(0xdf),%1(0x6e)
+    db  %1(0x47),%1(0xf1),%1(0x1a),%1(0x71),%1(0x1d),%1(0x29),%1(0xc5),%1(0x89)
+    db  %1(0x6f),%1(0xb7),%1(0x62),%1(0x0e),%1(0xaa),%1(0x18),%1(0xbe),%1(0x1b)
+    db  %1(0xfc),%1(0x56),%1(0x3e),%1(0x4b),%1(0xc6),%1(0xd2),%1(0x79),%1(0x20)
+    db  %1(0x9a),%1(0xdb),%1(0xc0),%1(0xfe),%1(0x78),%1(0xcd),%1(0x5a),%1(0xf4)
+    db  %1(0x1f),%1(0xdd),%1(0xa8),%1(0x33),%1(0x88),%1(0x07),%1(0xc7),%1(0x31)
+    db  %1(0xb1),%1(0x12),%1(0x10),%1(0x59),%1(0x27),%1(0x80),%1(0xec),%1(0x5f)
+    db  %1(0x60),%1(0x51),%1(0x7f),%1(0xa9),%1(0x19),%1(0xb5),%1(0x4a),%1(0x0d)
+    db  %1(0x2d),%1(0xe5),%1(0x7a),%1(0x9f),%1(0x93),%1(0xc9),%1(0x9c),%1(0xef)
+    db  %1(0xa0),%1(0xe0),%1(0x3b),%1(0x4d),%1(0xae),%1(0x2a),%1(0xf5),%1(0xb0)
+    db  %1(0xc8),%1(0xeb),%1(0xbb),%1(0x3c),%1(0x83),%1(0x53),%1(0x99),%1(0x61)
+    db  %1(0x17),%1(0x2b),%1(0x04),%1(0x7e),%1(0xba),%1(0x77),%1(0xd6),%1(0x26)
+    db  %1(0xe1),%1(0x69),%1(0x14),%1(0x63),%1(0x55),%1(0x21),%1(0x0c),%1(0x7d)
+%endmacro
+
+%define u8(x)   f2(x), x, x, f3(x), f2(x), x, x, f3(x)
+%define v8(x)   fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
+%define w8(x)   x, 0, 0, 0, x, 0, 0, 0
+
+%define tptr    rbp     ; table pointer
+%define kptr    r8      ; key schedule pointer
+%define fofs    128     ; adjust offset in key schedule to keep |disp| < 128
+%define fk_ref(x,y) [kptr-16*x+fofs+4*y]
+%ifdef  AES_REV_DKS
+%define rofs    128
+%define ik_ref(x,y) [kptr-16*x+rofs+4*y]
+%else
+%define rofs    -128
+%define ik_ref(x,y) [kptr+16*x+rofs+4*y]
+%endif
+
+%define tab_0(x)   [tptr+8*x]
+%define tab_1(x)   [tptr+8*x+3]
+%define tab_2(x)   [tptr+8*x+2]
+%define tab_3(x)   [tptr+8*x+1]
+%define tab_f(x)   byte [tptr+8*x+1]
+%define tab_i(x)   byte [tptr+8*x+7]
+%define t_ref(x,r) tab_ %+ x(r)
+
+%macro ff_rnd 5                 ; normal forward round
+    mov     %1d, fk_ref(%5,0)
+    mov     %2d, fk_ref(%5,1)
+    mov     %3d, fk_ref(%5,2)
+    mov     %4d, fk_ref(%5,3)
+
+    movzx   esi, al
+    movzx   edi, ah
+    shr     eax, 16
+    xor     %1d, t_ref(0,rsi)
+    xor     %4d, t_ref(1,rdi)
+    movzx   esi, al
+    movzx   edi, ah
+    xor     %3d, t_ref(2,rsi)
+    xor     %2d, t_ref(3,rdi)
+
+    movzx   esi, bl
+    movzx   edi, bh
+    shr     ebx, 16
+    xor     %2d, t_ref(0,rsi)
+    xor     %1d, t_ref(1,rdi)
+    movzx   esi, bl
+    movzx   edi, bh
+    xor     %4d, t_ref(2,rsi)
+    xor     %3d, t_ref(3,rdi)
+
+    movzx   esi, cl
+    movzx   edi, ch
+    shr     ecx, 16
+    xor     %3d, t_ref(0,rsi)
+    xor     %2d, t_ref(1,rdi)
+    movzx   esi, cl
+    movzx   edi, ch
+    xor     %1d, t_ref(2,rsi)
+    xor     %4d, t_ref(3,rdi)
+
+    movzx   esi, dl
+    movzx   edi, dh
+    shr     edx, 16
+    xor     %4d, t_ref(0,rsi)
+    xor     %3d, t_ref(1,rdi)
+    movzx   esi, dl
+    movzx   edi, dh
+    xor     %2d, t_ref(2,rsi)
+    xor     %1d, t_ref(3,rdi)
+
+    mov     eax,%1d
+    mov     ebx,%2d
+    mov     ecx,%3d
+    mov     edx,%4d
+%endmacro
+
+%ifdef LAST_ROUND_TABLES
+
+%macro fl_rnd 5                 ; last forward round
+    add     tptr, 2048
+    mov     %1d, fk_ref(%5,0)
+    mov     %2d, fk_ref(%5,1)
+    mov     %3d, fk_ref(%5,2)
+    mov     %4d, fk_ref(%5,3)
+
+    movzx   esi, al
+    movzx   edi, ah
+    shr     eax, 16
+    xor     %1d, t_ref(0,rsi)
+    xor     %4d, t_ref(1,rdi)
+    movzx   esi, al
+    movzx   edi, ah
+    xor     %3d, t_ref(2,rsi)
+    xor     %2d, t_ref(3,rdi)
+
+    movzx   esi, bl
+    movzx   edi, bh
+    shr     ebx, 16
+    xor     %2d, t_ref(0,rsi)
+    xor     %1d, t_ref(1,rdi)
+    movzx   esi, bl
+    movzx   edi, bh
+    xor     %4d, t_ref(2,rsi)
+    xor     %3d, t_ref(3,rdi)
+
+    movzx   esi, cl
+    movzx   edi, ch
+    shr     ecx, 16
+    xor     %3d, t_ref(0,rsi)
+    xor     %2d, t_ref(1,rdi)
+    movzx   esi, cl
+    movzx   edi, ch
+    xor     %1d, t_ref(2,rsi)
+    xor     %4d, t_ref(3,rdi)
+
+    movzx   esi, dl
+    movzx   edi, dh
+    shr     edx, 16
+    xor     %4d, t_ref(0,rsi)
+    xor     %3d, t_ref(1,rdi)
+    movzx   esi, dl
+    movzx   edi, dh
+    xor     %2d, t_ref(2,rsi)
+    xor     %1d, t_ref(3,rdi)
+%endmacro
+
+%else
+
+%macro fl_rnd 5                 ; last forward round
+    mov     %1d, fk_ref(%5,0)
+    mov     %2d, fk_ref(%5,1)
+    mov     %3d, fk_ref(%5,2)
+    mov     %4d, fk_ref(%5,3)
+
+    movzx   esi, al
+    movzx   edi, ah
+    shr     eax, 16
+    movzx   esi, t_ref(f,rsi)
+    movzx   edi, t_ref(f,rdi)
+    xor     %1d, esi
+    rol     edi, 8
+    xor     %4d, edi
+    movzx   esi, al
+    movzx   edi, ah
+    movzx   esi, t_ref(f,rsi)
+    movzx   edi, t_ref(f,rdi)
+    rol     esi, 16
+    rol     edi, 24
+    xor     %3d, esi
+    xor     %2d, edi
+
+    movzx   esi, bl
+    movzx   edi, bh
+    shr     ebx, 16
+    movzx   esi, t_ref(f,rsi)
+    movzx   edi, t_ref(f,rdi)
+    xor     %2d, esi
+    rol     edi, 8
+    xor     %1d, edi
+    movzx   esi, bl
+    movzx   edi, bh
+    movzx   esi, t_ref(f,rsi)
+    movzx   edi, t_ref(f,rdi)
+    rol     esi, 16
+    rol     edi, 24
+    xor     %4d, esi
+    xor     %3d, edi
+
+    movzx   esi, cl
+    movzx   edi, ch
+    movzx   esi, t_ref(f,rsi)
+    movzx   edi, t_ref(f,rdi)
+    shr     ecx, 16
+    xor     %3d, esi
+    rol     edi, 8
+    xor     %2d, edi
+    movzx   esi, cl
+    movzx   edi, ch
+    movzx   esi, t_ref(f,rsi)
+    movzx   edi, t_ref(f,rdi)
+    rol     esi, 16
+    rol     edi, 24
+    xor     %1d, esi
+    xor     %4d, edi
+
+    movzx   esi, dl
+    movzx   edi, dh
+    movzx   esi, t_ref(f,rsi)
+    movzx   edi, t_ref(f,rdi)
+    shr     edx, 16
+    xor     %4d, esi
+    rol     edi, 8
+    xor     %3d, edi
+    movzx   esi, dl
+    movzx   edi, dh
+    movzx   esi, t_ref(f,rsi)
+    movzx   edi, t_ref(f,rdi)
+    rol     esi, 16
+    rol     edi, 24
+    xor     %2d, esi
+    xor     %1d, edi
+%endmacro
+
+%endif
+
+%macro ii_rnd 5                 ; normal inverse round
+    mov     %1d, ik_ref(%5,0)
+    mov     %2d, ik_ref(%5,1)
+    mov     %3d, ik_ref(%5,2)
+    mov     %4d, ik_ref(%5,3)
+
+    movzx   esi, al
+    movzx   edi, ah
+    shr     eax, 16
+    xor     %1d, t_ref(0,rsi)
+    xor     %2d, t_ref(1,rdi)
+    movzx   esi, al
+    movzx   edi, ah
+    xor     %3d, t_ref(2,rsi)
+    xor     %4d, t_ref(3,rdi)
+
+    movzx   esi, bl
+    movzx   edi, bh
+    shr     ebx, 16
+    xor     %2d, t_ref(0,rsi)
+    xor     %3d, t_ref(1,rdi)
+    movzx   esi, bl
+    movzx   edi, bh
+    xor     %4d, t_ref(2,rsi)
+    xor     %1d, t_ref(3,rdi)
+
+    movzx   esi, cl
+    movzx   edi, ch
+    shr     ecx, 16
+    xor     %3d, t_ref(0,rsi)
+    xor     %4d, t_ref(1,rdi)
+    movzx   esi, cl
+    movzx   edi, ch
+    xor     %1d, t_ref(2,rsi)
+    xor     %2d, t_ref(3,rdi)
+
+    movzx   esi, dl
+    movzx   edi, dh
+    shr     edx, 16
+    xor     %4d, t_ref(0,rsi)
+    xor     %1d, t_ref(1,rdi)
+    movzx   esi, dl
+    movzx   edi, dh
+    xor     %2d, t_ref(2,rsi)
+    xor     %3d, t_ref(3,rdi)
+
+    mov     eax,%1d
+    mov     ebx,%2d
+    mov     ecx,%3d
+    mov     edx,%4d
+%endmacro
+
+%ifdef LAST_ROUND_TABLES
+
+%macro il_rnd 5                 ; last inverse round
+    add     tptr, 2048
+    mov     %1d, ik_ref(%5,0)
+    mov     %2d, ik_ref(%5,1)
+    mov     %3d, ik_ref(%5,2)
+    mov     %4d, ik_ref(%5,3)
+
+    movzx   esi, al
+    movzx   edi, ah
+    shr     eax, 16
+    xor     %1d, t_ref(0,rsi)
+    xor     %2d, t_ref(1,rdi)
+    movzx   esi, al
+    movzx   edi, ah
+    xor     %3d, t_ref(2,rsi)
+    xor     %4d, t_ref(3,rdi)
+
+    movzx   esi, bl
+    movzx   edi, bh
+    shr     ebx, 16
+    xor     %2d, t_ref(0,rsi)
+    xor     %3d, t_ref(1,rdi)
+    movzx   esi, bl
+    movzx   edi, bh
+    xor     %4d, t_ref(2,rsi)
+    xor     %1d, t_ref(3,rdi)
+
+    movzx   esi, cl
+    movzx   edi, ch
+    shr     ecx, 16
+    xor     %3d, t_ref(0,rsi)
+    xor     %4d, t_ref(1,rdi)
+    movzx   esi, cl
+    movzx   edi, ch
+    xor     %1d, t_ref(2,rsi)
+    xor     %2d, t_ref(3,rdi)
+
+    movzx   esi, dl
+    movzx   edi, dh
+    shr     edx, 16
+    xor     %4d, t_ref(0,rsi)
+    xor     %1d, t_ref(1,rdi)
+    movzx   esi, dl
+    movzx   edi, dh
+    xor     %2d, t_ref(2,rsi)
+    xor     %3d, t_ref(3,rdi)
+%endmacro
+
+%else
+
+%macro il_rnd 5                 ; last inverse round
+    mov     %1d, ik_ref(%5,0)
+    mov     %2d, ik_ref(%5,1)
+    mov     %3d, ik_ref(%5,2)
+    mov     %4d, ik_ref(%5,3)
+
+    movzx   esi, al
+    movzx   edi, ah
+    movzx   esi, t_ref(i,rsi)
+    movzx   edi, t_ref(i,rdi)
+    shr     eax, 16
+    xor     %1d, esi
+    rol     edi, 8
+    xor     %2d, edi
+    movzx   esi, al
+    movzx   edi, ah
+    movzx   esi, t_ref(i,rsi)
+    movzx   edi, t_ref(i,rdi)
+    rol     esi, 16
+    rol     edi, 24
+    xor     %3d, esi
+    xor     %4d, edi
+
+    movzx   esi, bl
+    movzx   edi, bh
+    movzx   esi, t_ref(i,rsi)
+    movzx   edi, t_ref(i,rdi)
+    shr     ebx, 16
+    xor     %2d, esi
+    rol     edi, 8
+    xor     %3d, edi
+    movzx   esi, bl
+    movzx   edi, bh
+    movzx   esi, t_ref(i,rsi)
+    movzx   edi, t_ref(i,rdi)
+    rol     esi, 16
+    rol     edi, 24
+    xor     %4d, esi
+    xor     %1d, edi
+
+    movzx   esi, cl
+    movzx   edi, ch
+    movzx   esi, t_ref(i,rsi)
+    movzx   edi, t_ref(i,rdi)
+    shr     ecx, 16
+    xor     %3d, esi
+    rol     edi, 8
+    xor     %4d, edi
+    movzx   esi, cl
+    movzx   edi, ch
+    movzx   esi, t_ref(i,rsi)
+    movzx   edi, t_ref(i,rdi)
+    rol     esi, 16
+    rol     edi, 24
+    xor     %1d, esi
+    xor     %2d, edi
+
+    movzx   esi, dl
+    movzx   edi, dh
+    movzx   esi, t_ref(i,rsi)
+    movzx   edi, t_ref(i,rdi)
+    shr     edx, 16
+    xor     %4d, esi
+    rol     edi, 8
+    xor     %1d, edi
+    movzx   esi, dl
+    movzx   edi, dh
+    movzx   esi, t_ref(i,rsi)
+    movzx   edi, t_ref(i,rdi)
+    rol     esi, 16
+    rol     edi, 24
+    xor     %2d, esi
+    xor     %3d, edi
+%endmacro
+
+%endif
+
+%ifdef ENCRYPTION
+
+    global  aes_encrypt
+%ifdef DLL_EXPORT
+    export  aes_encrypt
+%endif
+
+    section .data align=64
+    align   64
+enc_tab:
+    enc_vals u8
+%ifdef LAST_ROUND_TABLES
+    enc_vals w8
+%endif
+
+    section .text align=16
+    align   16
+
+%ifdef _SEH_
+proc_frame aes_encrypt
+	alloc_stack	7*8			; 7 to align stack to 16 bytes
+	save_reg	rsi,4*8
+	save_reg	rdi,5*8
+	save_reg	rbx,1*8
+	save_reg	rbp,2*8
+	save_reg	r12,3*8
+end_prologue
+    mov     rdi, rcx        ; input pointer
+    mov     [rsp+0*8], rdx  ; output pointer
+%else
+	aes_encrypt:
+	%ifdef __GNUC__
+		sub     rsp, 4*8        ; gnu/linux binary interface
+		mov     [rsp+0*8], rsi  ; output pointer
+		mov     r8, rdx         ; context
+	%else
+		sub     rsp, 6*8        ; windows binary interface
+		mov     [rsp+4*8], rsi
+		mov     [rsp+5*8], rdi
+		mov     rdi, rcx        ; input pointer
+		mov     [rsp+0*8], rdx  ; output pointer
+	%endif
+		mov     [rsp+1*8], rbx  ; input pointer in rdi
+		mov     [rsp+2*8], rbp  ; output pointer in [rsp]
+		mov     [rsp+3*8], r12  ; context in r8
+%endif
+
+    movzx   esi, byte [kptr+4*KS_LENGTH]
+    lea     tptr,[enc_tab wrt rip]
+    sub     kptr, fofs
+
+    mov     eax, [rdi+0*4]
+    mov     ebx, [rdi+1*4]
+    mov     ecx, [rdi+2*4]
+    mov     edx, [rdi+3*4]
+
+    xor     eax, [kptr+fofs]
+    xor     ebx, [kptr+fofs+4]
+    xor     ecx, [kptr+fofs+8]
+    xor     edx, [kptr+fofs+12]
+
+    lea     kptr,[kptr+rsi]
+    cmp     esi, 10*16
+    je      .3
+    cmp     esi, 12*16
+    je      .2
+    cmp     esi, 14*16
+    je      .1
+    mov     rax, -1
+    jmp     .4
+
+.1: ff_rnd  r9, r10, r11, r12, 13
+    ff_rnd  r9, r10, r11, r12, 12
+.2: ff_rnd  r9, r10, r11, r12, 11
+    ff_rnd  r9, r10, r11, r12, 10
+.3: ff_rnd  r9, r10, r11, r12, 9
+    ff_rnd  r9, r10, r11, r12, 8
+    ff_rnd  r9, r10, r11, r12, 7
+    ff_rnd  r9, r10, r11, r12, 6
+    ff_rnd  r9, r10, r11, r12, 5
+    ff_rnd  r9, r10, r11, r12, 4
+    ff_rnd  r9, r10, r11, r12, 3
+    ff_rnd  r9, r10, r11, r12, 2
+    ff_rnd  r9, r10, r11, r12, 1
+    fl_rnd  r9, r10, r11, r12, 0
+
+    mov     rbx, [rsp]
+    mov     [rbx], r9d
+    mov     [rbx+4], r10d
+    mov     [rbx+8], r11d
+    mov     [rbx+12], r12d
+    xor     rax, rax
+.4:
+    mov     rbx, [rsp+1*8]
+    mov     rbp, [rsp+2*8]
+    mov     r12, [rsp+3*8]
+%ifdef __GNUC__
+    add     rsp, 4*8
+    ret
+%else
+	mov     rsi, [rsp+4*8]
+	mov     rdi, [rsp+5*8]
+	%ifdef _SEH_
+		add     rsp, 7*8
+		ret
+	endproc_frame
+	%else
+		add     rsp, 6*8
+		ret
+	%endif
+%endif
+
+%endif
+
+%ifdef DECRYPTION
+
+    global  aes_decrypt
+%ifdef DLL_EXPORT
+    export  aes_decrypt
+%endif
+
+    section .data
+    align   64
+dec_tab:
+    dec_vals v8
+%ifdef LAST_ROUND_TABLES
+    dec_vals w8
+%endif
+
+    section .text
+    align   16
+
+%ifdef _SEH_
+proc_frame aes_decrypt
+	alloc_stack	7*8			; 7 to align stack to 16 bytes
+	save_reg	rsi,4*8
+	save_reg	rdi,5*8
+	save_reg	rbx,1*8
+	save_reg	rbp,2*8
+	save_reg	r12,3*8
+end_prologue
+    mov     rdi, rcx        ; input pointer
+    mov     [rsp+0*8], rdx  ; output pointer
+%else
+	aes_decrypt:
+	%ifdef __GNUC__
+		sub     rsp, 4*8        ; gnu/linux binary interface
+		mov     [rsp+0*8], rsi  ; output pointer
+		mov     r8, rdx         ; context
+	%else
+		sub     rsp, 6*8        ; windows binary interface
+		mov     [rsp+4*8], rsi
+		mov     [rsp+5*8], rdi
+		mov     rdi, rcx        ; input pointer
+		mov     [rsp+0*8], rdx  ; output pointer
+	%endif
+		mov     [rsp+1*8], rbx  ; input pointer in rdi
+		mov     [rsp+2*8], rbp  ; output pointer in [rsp]
+		mov     [rsp+3*8], r12  ; context in r8
+%endif
+
+    movzx   esi,byte[kptr+4*KS_LENGTH]
+    lea     tptr,[dec_tab wrt rip]
+    sub     kptr, rofs
+
+    mov     eax, [rdi+0*4]
+    mov     ebx, [rdi+1*4]
+    mov     ecx, [rdi+2*4]
+    mov     edx, [rdi+3*4]
+
+%ifdef      AES_REV_DKS
+    mov     rdi, kptr
+    lea     kptr,[kptr+rsi]
+%else
+    lea     rdi,[kptr+rsi]
+%endif
+
+    xor     eax, [rdi+rofs]
+    xor     ebx, [rdi+rofs+4]
+    xor     ecx, [rdi+rofs+8]
+    xor     edx, [rdi+rofs+12]
+
+    cmp     esi, 10*16
+    je      .3
+    cmp     esi, 12*16
+    je      .2
+    cmp     esi, 14*16
+    je      .1
+    mov     rax, -1
+    jmp     .4
+
+.1: ii_rnd  r9, r10, r11, r12, 13
+    ii_rnd  r9, r10, r11, r12, 12
+.2: ii_rnd  r9, r10, r11, r12, 11
+    ii_rnd  r9, r10, r11, r12, 10
+.3: ii_rnd  r9, r10, r11, r12, 9
+    ii_rnd  r9, r10, r11, r12, 8
+    ii_rnd  r9, r10, r11, r12, 7
+    ii_rnd  r9, r10, r11, r12, 6
+    ii_rnd  r9, r10, r11, r12, 5
+    ii_rnd  r9, r10, r11, r12, 4
+    ii_rnd  r9, r10, r11, r12, 3
+    ii_rnd  r9, r10, r11, r12, 2
+    ii_rnd  r9, r10, r11, r12, 1
+    il_rnd  r9, r10, r11, r12, 0
+
+    mov     rbx, [rsp]
+    mov     [rbx], r9d
+    mov     [rbx+4], r10d
+    mov     [rbx+8], r11d
+    mov     [rbx+12], r12d
+    xor     rax, rax
+.4: mov     rbx, [rsp+1*8]
+    mov     rbp, [rsp+2*8]
+    mov     r12, [rsp+3*8]
+%ifdef __GNUC__
+    add     rsp, 4*8
+    ret
+%else
+	mov     rsi, [rsp+4*8]
+	mov     rdi, [rsp+5*8]
+	%ifdef _SEH_
+		add     rsp, 7*8
+		ret
+	endproc_frame
+	%else
+		add     rsp, 6*8
+		ret
+	%endif
+%endif
+
+%endif
+
+    end
--- a/src/java/kp2akeytransform/jni/aes/aes_modes.c
+++ b/src/java/kp2akeytransform/jni/aes/aes_modes.c
@ -0,0 +1,945 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+
+ These subroutines implement multiple block AES modes for ECB, CBC, CFB,
+ OFB and CTR encryption,  The code provides support for the VIA Advanced
+ Cryptography Engine (ACE).
+
+ NOTE: In the following subroutines, the AES contexts (ctx) must be
+ 16 byte aligned if VIA ACE is being used
+*/
+
+#include <string.h>
+#include <assert.h>
+
+#include "aesopt.h"
+
+#if defined( AES_MODES )
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#if defined( _MSC_VER ) && ( _MSC_VER > 800 )
+#pragma intrinsic(memcpy)
+#endif
+
+#define BFR_BLOCKS      8
+
+/* These values are used to detect long word alignment in order to */
+/* speed up some buffer operations. This facility may not work on  */
+/* some machines so this define can be commented out if necessary  */
+
+#define FAST_BUFFER_OPERATIONS
+
+#define lp32(x)         ((uint_32t*)(x))
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+#include "aes_via_ace.h"
+
+#pragma pack(16)
+
+aligned_array(unsigned long,    enc_gen_table, 12, 16) =    NEH_ENC_GEN_DATA;
+aligned_array(unsigned long,   enc_load_table, 12, 16) =   NEH_ENC_LOAD_DATA;
+aligned_array(unsigned long, enc_hybrid_table, 12, 16) = NEH_ENC_HYBRID_DATA;
+aligned_array(unsigned long,    dec_gen_table, 12, 16) =    NEH_DEC_GEN_DATA;
+aligned_array(unsigned long,   dec_load_table, 12, 16) =   NEH_DEC_LOAD_DATA;
+aligned_array(unsigned long, dec_hybrid_table, 12, 16) = NEH_DEC_HYBRID_DATA;
+
+/* NOTE: These control word macros must only be used after  */
+/* a key has been set up because they depend on key size    */
+
+#if NEH_KEY_TYPE == NEH_LOAD
+#define kd_adr(c)   ((uint_8t*)(c)->ks)
+#elif NEH_KEY_TYPE == NEH_GENERATE
+#define kd_adr(c)   ((uint_8t*)(c)->ks + (c)->inf.b[0])
+#else
+#define kd_adr(c)   ((uint_8t*)(c)->ks + ((c)->inf.b[0] == 160 ? 160 : 0))
+#endif
+
+#else
+
+#define aligned_array(type, name, no, stride) type name[no]
+#define aligned_auto(type, name, no, stride)  type name[no]
+
+#endif
+
+#if defined( _MSC_VER ) && _MSC_VER > 1200
+
+#define via_cwd(cwd, ty, dir, len) \
+    unsigned long* cwd = (dir##_##ty##_table + ((len - 128) >> 4))
+
+#else
+
+#define via_cwd(cwd, ty, dir, len)              \
+    aligned_auto(unsigned long, cwd, 4, 16);    \
+    cwd[1] = cwd[2] = cwd[3] = 0;               \
+    cwd[0] = neh_##dir##_##ty##_key(len)
+
+#endif
+
+/* test the code for detecting and setting pointer alignment */
+
+AES_RETURN aes_test_alignment_detection(unsigned int n)	/* 4 <= n <= 16 */
+{	uint_8t	p[16];
+	uint_32t i, count_eq = 0, count_neq = 0;
+
+	if(n < 4 || n > 16)
+		return EXIT_FAILURE;
+
+	for(i = 0; i < n; ++i)
+	{
+		uint_8t *qf = ALIGN_FLOOR(p + i, n),
+				*qh =  ALIGN_CEIL(p + i, n);
+		
+		if(qh == qf)
+			++count_eq;
+		else if(qh == qf + n)
+			++count_neq;
+		else
+			return EXIT_FAILURE;
+	}
+	return (count_eq != 1 || count_neq != n - 1 ? EXIT_FAILURE : EXIT_SUCCESS);
+}
+
+AES_RETURN aes_mode_reset(aes_encrypt_ctx ctx[1])
+{
+    ctx->inf.b[2] = 0;
+    return EXIT_SUCCESS;
+}
+
+AES_RETURN aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_encrypt_ctx ctx[1])
+{   int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1))
+        return EXIT_FAILURE;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+    if(ctx->inf.b[1] == 0xff)
+    {   uint_8t *ksp = (uint_8t*)(ctx->ks);
+        via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+
+        if(ALIGN_OFFSET( ctx, 16 ))
+            return EXIT_FAILURE;
+
+        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+        {
+            via_ecb_op5(ksp, cwd, ibuf, obuf, nb);
+        }
+        else
+        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint_8t *ip, *op;
+
+            while(nb)
+            {
+                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
+
+                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                if(ip != ibuf)
+                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                via_ecb_op5(ksp, cwd, ip, op, m);
+
+                if(op != obuf)
+                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                ibuf += m * AES_BLOCK_SIZE;
+                obuf += m * AES_BLOCK_SIZE;
+                nb -= m;
+            }
+        }
+
+        return EXIT_SUCCESS;
+    }
+
+#endif
+
+#if !defined( ASSUME_VIA_ACE_PRESENT )
+    while(nb--)
+    {
+        if(aes_encrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
+			return EXIT_FAILURE;
+        ibuf += AES_BLOCK_SIZE;
+        obuf += AES_BLOCK_SIZE;
+    }
+#endif
+    return EXIT_SUCCESS;
+}
+
+AES_RETURN aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_decrypt_ctx ctx[1])
+{   int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1))
+        return EXIT_FAILURE;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+    if(ctx->inf.b[1] == 0xff)
+    {   uint_8t *ksp = kd_adr(ctx);
+        via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
+
+        if(ALIGN_OFFSET( ctx, 16 ))
+            return EXIT_FAILURE;
+
+        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+        {
+            via_ecb_op5(ksp, cwd, ibuf, obuf, nb);
+        }
+        else
+        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint_8t *ip, *op;
+
+            while(nb)
+            {
+                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
+
+                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                if(ip != ibuf)
+                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                via_ecb_op5(ksp, cwd, ip, op, m);
+
+                if(op != obuf)
+                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                ibuf += m * AES_BLOCK_SIZE;
+                obuf += m * AES_BLOCK_SIZE;
+                nb -= m;
+            }
+        }
+
+        return EXIT_SUCCESS;
+    }
+
+#endif
+
+#if !defined( ASSUME_VIA_ACE_PRESENT )
+    while(nb--)
+    {
+        if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
+			return EXIT_FAILURE;
+        ibuf += AES_BLOCK_SIZE;
+        obuf += AES_BLOCK_SIZE;
+    }
+#endif
+    return EXIT_SUCCESS;
+}
+
+AES_RETURN aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_encrypt_ctx ctx[1])
+{   int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1))
+        return EXIT_FAILURE;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+    if(ctx->inf.b[1] == 0xff)
+    {   uint_8t *ksp = (uint_8t*)(ctx->ks), *ivp = iv;
+        aligned_auto(uint_8t, liv, AES_BLOCK_SIZE, 16);
+        via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+
+        if(ALIGN_OFFSET( ctx, 16 ))
+            return EXIT_FAILURE;
+
+        if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+        {
+            ivp = liv;
+            memcpy(liv, iv, AES_BLOCK_SIZE);
+        }
+
+        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ) && !ALIGN_OFFSET( iv, 16 ))
+        {
+            via_cbc_op7(ksp, cwd, ibuf, obuf, nb, ivp, ivp);
+        }
+        else
+        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint_8t *ip, *op;
+
+            while(nb)
+            {
+                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
+
+                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                if(ip != ibuf)
+                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                via_cbc_op7(ksp, cwd, ip, op, m, ivp, ivp);
+
+                if(op != obuf)
+                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                ibuf += m * AES_BLOCK_SIZE;
+                obuf += m * AES_BLOCK_SIZE;
+                nb -= m;
+            }
+        }
+
+        if(iv != ivp)
+            memcpy(iv, ivp, AES_BLOCK_SIZE);
+
+        return EXIT_SUCCESS;
+    }
+
+#endif
+
+#if !defined( ASSUME_VIA_ACE_PRESENT )
+# ifdef FAST_BUFFER_OPERATIONS
+    if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
+        while(nb--)
+        {
+            lp32(iv)[0] ^= lp32(ibuf)[0];
+            lp32(iv)[1] ^= lp32(ibuf)[1];
+            lp32(iv)[2] ^= lp32(ibuf)[2];
+            lp32(iv)[3] ^= lp32(ibuf)[3];
+            if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+				return EXIT_FAILURE;
+            memcpy(obuf, iv, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+        }
+    else
+# endif
+        while(nb--)
+        {
+            iv[ 0] ^= ibuf[ 0]; iv[ 1] ^= ibuf[ 1];
+            iv[ 2] ^= ibuf[ 2]; iv[ 3] ^= ibuf[ 3];
+            iv[ 4] ^= ibuf[ 4]; iv[ 5] ^= ibuf[ 5];
+            iv[ 6] ^= ibuf[ 6]; iv[ 7] ^= ibuf[ 7];
+            iv[ 8] ^= ibuf[ 8]; iv[ 9] ^= ibuf[ 9];
+            iv[10] ^= ibuf[10]; iv[11] ^= ibuf[11];
+            iv[12] ^= ibuf[12]; iv[13] ^= ibuf[13];
+            iv[14] ^= ibuf[14]; iv[15] ^= ibuf[15];
+            if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+				return EXIT_FAILURE;
+            memcpy(obuf, iv, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+        }
+#endif
+    return EXIT_SUCCESS;
+}
+
+AES_RETURN aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_decrypt_ctx ctx[1])
+{   unsigned char tmp[AES_BLOCK_SIZE];
+    int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1))
+        return EXIT_FAILURE;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+    if(ctx->inf.b[1] == 0xff)
+    {   uint_8t *ksp = kd_adr(ctx), *ivp = iv;
+        aligned_auto(uint_8t, liv, AES_BLOCK_SIZE, 16);
+        via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
+
+        if(ALIGN_OFFSET( ctx, 16 ))
+            return EXIT_FAILURE;
+
+        if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+        {
+            ivp = liv;
+            memcpy(liv, iv, AES_BLOCK_SIZE);
+        }
+
+        if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ) && !ALIGN_OFFSET( iv, 16 ))
+        {
+            via_cbc_op6(ksp, cwd, ibuf, obuf, nb, ivp);
+        }
+        else
+        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint_8t *ip, *op;
+
+            while(nb)
+            {
+                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
+
+                ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                if(ip != ibuf)
+                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                via_cbc_op6(ksp, cwd, ip, op, m, ivp);
+
+                if(op != obuf)
+                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                ibuf += m * AES_BLOCK_SIZE;
+                obuf += m * AES_BLOCK_SIZE;
+                nb -= m;
+            }
+        }
+
+        if(iv != ivp)
+            memcpy(iv, ivp, AES_BLOCK_SIZE);
+
+        return EXIT_SUCCESS;
+    }
+#endif
+
+#if !defined( ASSUME_VIA_ACE_PRESENT )
+# ifdef FAST_BUFFER_OPERATIONS
+    if(!ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
+        while(nb--)
+        {
+            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
+            if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
+				return EXIT_FAILURE;
+            lp32(obuf)[0] ^= lp32(iv)[0];
+            lp32(obuf)[1] ^= lp32(iv)[1];
+            lp32(obuf)[2] ^= lp32(iv)[2];
+            lp32(obuf)[3] ^= lp32(iv)[3];
+            memcpy(iv, tmp, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+        }
+    else
+# endif
+        while(nb--)
+        {
+            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
+            if(aes_decrypt(ibuf, obuf, ctx) != EXIT_SUCCESS)
+				return EXIT_FAILURE;
+            obuf[ 0] ^= iv[ 0]; obuf[ 1] ^= iv[ 1];
+            obuf[ 2] ^= iv[ 2]; obuf[ 3] ^= iv[ 3];
+            obuf[ 4] ^= iv[ 4]; obuf[ 5] ^= iv[ 5];
+            obuf[ 6] ^= iv[ 6]; obuf[ 7] ^= iv[ 7];
+            obuf[ 8] ^= iv[ 8]; obuf[ 9] ^= iv[ 9];
+            obuf[10] ^= iv[10]; obuf[11] ^= iv[11];
+            obuf[12] ^= iv[12]; obuf[13] ^= iv[13];
+            obuf[14] ^= iv[14]; obuf[15] ^= iv[15];
+            memcpy(iv, tmp, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+        }
+#endif
+    return EXIT_SUCCESS;
+}
+
+AES_RETURN aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
+{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
+
+    if(b_pos)           /* complete any partial block   */
+    {
+        while(b_pos < AES_BLOCK_SIZE && cnt < len)
+        {
+            *obuf++ = (iv[b_pos++] ^= *ibuf++);
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    if((nb = (len - cnt) >> 4) != 0)    /* process whole blocks */
+    {
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+        if(ctx->inf.b[1] == 0xff)
+        {   int m;
+            uint_8t *ksp = (uint_8t*)(ctx->ks), *ivp = iv;
+            aligned_auto(uint_8t, liv, AES_BLOCK_SIZE, 16);
+            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+
+            if(ALIGN_OFFSET( ctx, 16 ))
+                return EXIT_FAILURE;
+
+            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+            {
+                ivp = liv;
+                memcpy(liv, iv, AES_BLOCK_SIZE);
+            }
+
+            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+            {
+                via_cfb_op7(ksp, cwd, ibuf, obuf, nb, ivp, ivp);
+                ibuf += nb * AES_BLOCK_SIZE;
+                obuf += nb * AES_BLOCK_SIZE;
+                cnt  += nb * AES_BLOCK_SIZE;
+            }
+            else    /* input, output or both are unaligned  */
+            {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+                uint_8t *ip, *op;
+
+                while(nb)
+                {
+                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
+
+                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                    if(ip != ibuf)
+                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                    via_cfb_op7(ksp, cwd, ip, op, m, ivp, ivp);
+
+                    if(op != obuf)
+                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                    ibuf += m * AES_BLOCK_SIZE;
+                    obuf += m * AES_BLOCK_SIZE;
+                    cnt  += m * AES_BLOCK_SIZE;
+                }
+            }
+
+            if(ivp != iv)
+                memcpy(iv, ivp, AES_BLOCK_SIZE);
+        }
+#else
+# ifdef FAST_BUFFER_OPERATIONS
+        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+					return EXIT_FAILURE;
+                lp32(obuf)[0] = lp32(iv)[0] ^= lp32(ibuf)[0];
+                lp32(obuf)[1] = lp32(iv)[1] ^= lp32(ibuf)[1];
+                lp32(obuf)[2] = lp32(iv)[2] ^= lp32(ibuf)[2];
+                lp32(obuf)[3] = lp32(iv)[3] ^= lp32(ibuf)[3];
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+        else
+# endif
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+					return EXIT_FAILURE;
+                obuf[ 0] = iv[ 0] ^= ibuf[ 0]; obuf[ 1] = iv[ 1] ^= ibuf[ 1];
+                obuf[ 2] = iv[ 2] ^= ibuf[ 2]; obuf[ 3] = iv[ 3] ^= ibuf[ 3];
+                obuf[ 4] = iv[ 4] ^= ibuf[ 4]; obuf[ 5] = iv[ 5] ^= ibuf[ 5];
+                obuf[ 6] = iv[ 6] ^= ibuf[ 6]; obuf[ 7] = iv[ 7] ^= ibuf[ 7];
+                obuf[ 8] = iv[ 8] ^= ibuf[ 8]; obuf[ 9] = iv[ 9] ^= ibuf[ 9];
+                obuf[10] = iv[10] ^= ibuf[10]; obuf[11] = iv[11] ^= ibuf[11];
+                obuf[12] = iv[12] ^= ibuf[12]; obuf[13] = iv[13] ^= ibuf[13];
+                obuf[14] = iv[14] ^= ibuf[14]; obuf[15] = iv[15] ^= ibuf[15];
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+#endif
+    }
+
+    while(cnt < len)
+    {
+        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+			return EXIT_FAILURE;
+
+        while(cnt < len && b_pos < AES_BLOCK_SIZE)
+        {
+            *obuf++ = (iv[b_pos++] ^= *ibuf++);
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    ctx->inf.b[2] = (uint_8t)b_pos;
+    return EXIT_SUCCESS;
+}
+
+AES_RETURN aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
+{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
+
+    if(b_pos)           /* complete any partial block   */
+    {   uint_8t t;
+
+        while(b_pos < AES_BLOCK_SIZE && cnt < len)
+        {
+            t = *ibuf++;
+            *obuf++ = t ^ iv[b_pos];
+            iv[b_pos++] = t;
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    if((nb = (len - cnt) >> 4) != 0)    /* process whole blocks */
+    {
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+        if(ctx->inf.b[1] == 0xff)
+        {   int m;
+            uint_8t *ksp = (uint_8t*)(ctx->ks), *ivp = iv;
+            aligned_auto(uint_8t, liv, AES_BLOCK_SIZE, 16);
+            via_cwd(cwd, hybrid, dec, 2 * ctx->inf.b[0] - 192);
+
+            if(ALIGN_OFFSET( ctx, 16 ))
+                return EXIT_FAILURE;
+
+            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+            {
+                ivp = liv;
+                memcpy(liv, iv, AES_BLOCK_SIZE);
+            }
+
+            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+            {
+                via_cfb_op6(ksp, cwd, ibuf, obuf, nb, ivp);
+                ibuf += nb * AES_BLOCK_SIZE;
+                obuf += nb * AES_BLOCK_SIZE;
+                cnt  += nb * AES_BLOCK_SIZE;
+            }
+            else    /* input, output or both are unaligned  */
+            {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+                uint_8t *ip, *op;
+
+                while(nb)
+                {
+                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
+
+                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                    if(ip != ibuf)  /* input buffer is not aligned */
+                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                    via_cfb_op6(ksp, cwd, ip, op, m, ivp);
+
+                    if(op != obuf)  /* output buffer is not aligned */
+                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                    ibuf += m * AES_BLOCK_SIZE;
+                    obuf += m * AES_BLOCK_SIZE;
+                    cnt  += m * AES_BLOCK_SIZE;
+                }
+            }
+
+            if(ivp != iv)
+                memcpy(iv, ivp, AES_BLOCK_SIZE);
+        }
+#else
+# ifdef FAST_BUFFER_OPERATIONS
+        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) &&!ALIGN_OFFSET( iv, 4 ))
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {   uint_32t t;
+
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+					return EXIT_FAILURE;
+                t = lp32(ibuf)[0], lp32(obuf)[0] = t ^ lp32(iv)[0], lp32(iv)[0] = t;
+                t = lp32(ibuf)[1], lp32(obuf)[1] = t ^ lp32(iv)[1], lp32(iv)[1] = t;
+                t = lp32(ibuf)[2], lp32(obuf)[2] = t ^ lp32(iv)[2], lp32(iv)[2] = t;
+                t = lp32(ibuf)[3], lp32(obuf)[3] = t ^ lp32(iv)[3], lp32(iv)[3] = t;
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+        else
+# endif
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {   uint_8t t;
+
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+					return EXIT_FAILURE;
+                t = ibuf[ 0], obuf[ 0] = t ^ iv[ 0], iv[ 0] = t;
+                t = ibuf[ 1], obuf[ 1] = t ^ iv[ 1], iv[ 1] = t;
+                t = ibuf[ 2], obuf[ 2] = t ^ iv[ 2], iv[ 2] = t;
+                t = ibuf[ 3], obuf[ 3] = t ^ iv[ 3], iv[ 3] = t;
+                t = ibuf[ 4], obuf[ 4] = t ^ iv[ 4], iv[ 4] = t;
+                t = ibuf[ 5], obuf[ 5] = t ^ iv[ 5], iv[ 5] = t;
+                t = ibuf[ 6], obuf[ 6] = t ^ iv[ 6], iv[ 6] = t;
+                t = ibuf[ 7], obuf[ 7] = t ^ iv[ 7], iv[ 7] = t;
+                t = ibuf[ 8], obuf[ 8] = t ^ iv[ 8], iv[ 8] = t;
+                t = ibuf[ 9], obuf[ 9] = t ^ iv[ 9], iv[ 9] = t;
+                t = ibuf[10], obuf[10] = t ^ iv[10], iv[10] = t;
+                t = ibuf[11], obuf[11] = t ^ iv[11], iv[11] = t;
+                t = ibuf[12], obuf[12] = t ^ iv[12], iv[12] = t;
+                t = ibuf[13], obuf[13] = t ^ iv[13], iv[13] = t;
+                t = ibuf[14], obuf[14] = t ^ iv[14], iv[14] = t;
+                t = ibuf[15], obuf[15] = t ^ iv[15], iv[15] = t;
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+#endif
+    }
+
+    while(cnt < len)
+    {   uint_8t t;
+
+        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+			return EXIT_FAILURE;
+
+        while(cnt < len && b_pos < AES_BLOCK_SIZE)
+        {
+            t = *ibuf++;
+            *obuf++ = t ^ iv[b_pos];
+            iv[b_pos++] = t;
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    ctx->inf.b[2] = (uint_8t)b_pos;
+    return EXIT_SUCCESS;
+}
+
+AES_RETURN aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx ctx[1])
+{   int cnt = 0, b_pos = (int)ctx->inf.b[2], nb;
+
+    if(b_pos)           /* complete any partial block   */
+    {
+        while(b_pos < AES_BLOCK_SIZE && cnt < len)
+        {
+            *obuf++ = iv[b_pos++] ^ *ibuf++;
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    if((nb = (len - cnt) >> 4) != 0)   /* process whole blocks */
+    {
+#if defined( USE_VIA_ACE_IF_PRESENT )
+
+        if(ctx->inf.b[1] == 0xff)
+        {   int m;
+            uint_8t *ksp = (uint_8t*)(ctx->ks), *ivp = iv;
+            aligned_auto(uint_8t, liv, AES_BLOCK_SIZE, 16);
+            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+
+            if(ALIGN_OFFSET( ctx, 16 ))
+                return EXIT_FAILURE;
+
+            if(ALIGN_OFFSET( iv, 16 ))   /* ensure an aligned iv */
+            {
+                ivp = liv;
+                memcpy(liv, iv, AES_BLOCK_SIZE);
+            }
+
+            if(!ALIGN_OFFSET( ibuf, 16 ) && !ALIGN_OFFSET( obuf, 16 ))
+            {
+                via_ofb_op6(ksp, cwd, ibuf, obuf, nb, ivp);
+                ibuf += nb * AES_BLOCK_SIZE;
+                obuf += nb * AES_BLOCK_SIZE;
+                cnt  += nb * AES_BLOCK_SIZE;
+            }
+            else    /* input, output or both are unaligned  */
+        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
+            uint_8t *ip, *op;
+
+                while(nb)
+                {
+                    m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb), nb -= m;
+
+                    ip = (ALIGN_OFFSET( ibuf, 16 ) ? buf : ibuf);
+                    op = (ALIGN_OFFSET( obuf, 16 ) ? buf : obuf);
+
+                    if(ip != ibuf)
+                        memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
+
+                    via_ofb_op6(ksp, cwd, ip, op, m, ivp);
+
+                    if(op != obuf)
+                        memcpy(obuf, buf, m * AES_BLOCK_SIZE);
+
+                    ibuf += m * AES_BLOCK_SIZE;
+                    obuf += m * AES_BLOCK_SIZE;
+                    cnt  += m * AES_BLOCK_SIZE;
+                }
+            }
+
+            if(ivp != iv)
+                memcpy(iv, ivp, AES_BLOCK_SIZE);
+        }
+#else
+# ifdef FAST_BUFFER_OPERATIONS
+        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( iv, 4 ))
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+					return EXIT_FAILURE;
+                lp32(obuf)[0] = lp32(iv)[0] ^ lp32(ibuf)[0];
+                lp32(obuf)[1] = lp32(iv)[1] ^ lp32(ibuf)[1];
+                lp32(obuf)[2] = lp32(iv)[2] ^ lp32(ibuf)[2];
+                lp32(obuf)[3] = lp32(iv)[3] ^ lp32(ibuf)[3];
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+        else
+# endif
+            while(cnt + AES_BLOCK_SIZE <= len)
+            {
+                assert(b_pos == 0);
+                if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+					return EXIT_FAILURE;
+                obuf[ 0] = iv[ 0] ^ ibuf[ 0]; obuf[ 1] = iv[ 1] ^ ibuf[ 1];
+                obuf[ 2] = iv[ 2] ^ ibuf[ 2]; obuf[ 3] = iv[ 3] ^ ibuf[ 3];
+                obuf[ 4] = iv[ 4] ^ ibuf[ 4]; obuf[ 5] = iv[ 5] ^ ibuf[ 5];
+                obuf[ 6] = iv[ 6] ^ ibuf[ 6]; obuf[ 7] = iv[ 7] ^ ibuf[ 7];
+                obuf[ 8] = iv[ 8] ^ ibuf[ 8]; obuf[ 9] = iv[ 9] ^ ibuf[ 9];
+                obuf[10] = iv[10] ^ ibuf[10]; obuf[11] = iv[11] ^ ibuf[11];
+                obuf[12] = iv[12] ^ ibuf[12]; obuf[13] = iv[13] ^ ibuf[13];
+                obuf[14] = iv[14] ^ ibuf[14]; obuf[15] = iv[15] ^ ibuf[15];
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+                cnt  += AES_BLOCK_SIZE;
+            }
+#endif
+    }
+
+    while(cnt < len)
+    {
+        if(!b_pos && aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
+			return EXIT_FAILURE;
+
+        while(cnt < len && b_pos < AES_BLOCK_SIZE)
+        {
+            *obuf++ = iv[b_pos++] ^ *ibuf++;
+            cnt++;
+        }
+
+        b_pos = (b_pos == AES_BLOCK_SIZE ? 0 : b_pos);
+    }
+
+    ctx->inf.b[2] = (uint_8t)b_pos;
+    return EXIT_SUCCESS;
+}
+
+#define BFR_LENGTH  (BFR_BLOCKS * AES_BLOCK_SIZE)
+
+AES_RETURN aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf,
+            int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx ctx[1])
+{   unsigned char   *ip;
+    int             i, blen, b_pos = (int)(ctx->inf.b[2]);
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+    aligned_auto(uint_8t, buf, BFR_LENGTH, 16);
+    if(ctx->inf.b[1] == 0xff && ALIGN_OFFSET( ctx, 16 ))
+        return EXIT_FAILURE;
+#else
+    uint_8t buf[BFR_LENGTH];
+#endif
+
+    if(b_pos)
+    {
+        memcpy(buf, cbuf, AES_BLOCK_SIZE);
+        if(aes_ecb_encrypt(buf, buf, AES_BLOCK_SIZE, ctx) != EXIT_SUCCESS)
+			return EXIT_FAILURE;
+
+        while(b_pos < AES_BLOCK_SIZE && len)
+        {
+            *obuf++ = *ibuf++ ^ buf[b_pos++];
+            --len;
+        }
+
+        if(len)
+            ctr_inc(cbuf), b_pos = 0;
+    }
+
+    while(len)
+    {
+        blen = (len > BFR_LENGTH ? BFR_LENGTH : len), len -= blen;
+
+        for(i = 0, ip = buf; i < (blen >> 4); ++i)
+        {
+            memcpy(ip, cbuf, AES_BLOCK_SIZE);
+            ctr_inc(cbuf);
+            ip += AES_BLOCK_SIZE;
+        }
+
+        if(blen & (AES_BLOCK_SIZE - 1))
+            memcpy(ip, cbuf, AES_BLOCK_SIZE), i++;
+
+#if defined( USE_VIA_ACE_IF_PRESENT )
+        if(ctx->inf.b[1] == 0xff)
+        {
+            via_cwd(cwd, hybrid, enc, 2 * ctx->inf.b[0] - 192);
+            via_ecb_op5((ctx->ks), cwd, buf, buf, i);
+        }
+        else
+#endif
+        if(aes_ecb_encrypt(buf, buf, i * AES_BLOCK_SIZE, ctx) != EXIT_SUCCESS)
+			return EXIT_FAILURE;
+
+        i = 0; ip = buf;
+# ifdef FAST_BUFFER_OPERATIONS
+        if(!ALIGN_OFFSET( ibuf, 4 ) && !ALIGN_OFFSET( obuf, 4 ) && !ALIGN_OFFSET( ip, 4 ))
+            while(i + AES_BLOCK_SIZE <= blen)
+            {
+                lp32(obuf)[0] = lp32(ibuf)[0] ^ lp32(ip)[0];
+                lp32(obuf)[1] = lp32(ibuf)[1] ^ lp32(ip)[1];
+                lp32(obuf)[2] = lp32(ibuf)[2] ^ lp32(ip)[2];
+                lp32(obuf)[3] = lp32(ibuf)[3] ^ lp32(ip)[3];
+                i += AES_BLOCK_SIZE;
+                ip += AES_BLOCK_SIZE;
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+            }
+        else
+#endif
+            while(i + AES_BLOCK_SIZE <= blen)
+            {
+                obuf[ 0] = ibuf[ 0] ^ ip[ 0]; obuf[ 1] = ibuf[ 1] ^ ip[ 1];
+                obuf[ 2] = ibuf[ 2] ^ ip[ 2]; obuf[ 3] = ibuf[ 3] ^ ip[ 3];
+                obuf[ 4] = ibuf[ 4] ^ ip[ 4]; obuf[ 5] = ibuf[ 5] ^ ip[ 5];
+                obuf[ 6] = ibuf[ 6] ^ ip[ 6]; obuf[ 7] = ibuf[ 7] ^ ip[ 7];
+                obuf[ 8] = ibuf[ 8] ^ ip[ 8]; obuf[ 9] = ibuf[ 9] ^ ip[ 9];
+                obuf[10] = ibuf[10] ^ ip[10]; obuf[11] = ibuf[11] ^ ip[11];
+                obuf[12] = ibuf[12] ^ ip[12]; obuf[13] = ibuf[13] ^ ip[13];
+                obuf[14] = ibuf[14] ^ ip[14]; obuf[15] = ibuf[15] ^ ip[15];
+                i += AES_BLOCK_SIZE;
+                ip += AES_BLOCK_SIZE;
+                ibuf += AES_BLOCK_SIZE;
+                obuf += AES_BLOCK_SIZE;
+            }
+
+        while(i++ < blen)
+            *obuf++ = *ibuf++ ^ ip[b_pos++];
+    }
+
+    ctx->inf.b[2] = (uint_8t)b_pos;
+    return EXIT_SUCCESS;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+#endif
--- a/src/java/kp2akeytransform/jni/aes/aes_via_ace.h
+++ b/src/java/kp2akeytransform/jni/aes/aes_via_ace.h
@ -0,0 +1,529 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/20077
+*/
+
+#ifndef AES_VIA_ACE_H
+#define AES_VIA_ACE_H
+
+#if defined( _MSC_VER )
+#  define INLINE  __inline
+#elif defined( __GNUC__ )
+#  define INLINE  static inline
+#else
+#  error VIA ACE requires Microsoft or GNU C
+#endif
+
+#define NEH_GENERATE    1
+#define NEH_LOAD        2
+#define NEH_HYBRID      3
+
+#define MAX_READ_ATTEMPTS   1000
+
+/* VIA Nehemiah RNG and ACE Feature Mask Values */
+
+#define NEH_CPU_IS_VIA      0x00000001
+#define NEH_CPU_READ        0x00000010
+#define NEH_CPU_MASK        0x00000011
+
+#define NEH_RNG_PRESENT     0x00000004
+#define NEH_RNG_ENABLED     0x00000008
+#define NEH_ACE_PRESENT     0x00000040
+#define NEH_ACE_ENABLED     0x00000080
+#define NEH_RNG_FLAGS       (NEH_RNG_PRESENT | NEH_RNG_ENABLED)
+#define NEH_ACE_FLAGS       (NEH_ACE_PRESENT | NEH_ACE_ENABLED)
+#define NEH_FLAGS_MASK      (NEH_RNG_FLAGS | NEH_ACE_FLAGS)
+
+/* VIA Nehemiah Advanced Cryptography Engine (ACE) Control Word Values  */
+
+#define NEH_GEN_KEY     0x00000000      /* generate key schedule        */
+#define NEH_LOAD_KEY    0x00000080      /* load schedule from memory    */
+#define NEH_ENCRYPT     0x00000000      /* encryption                   */
+#define NEH_DECRYPT     0x00000200      /* decryption                   */
+#define NEH_KEY128      0x00000000+0x0a /* 128 bit key                  */
+#define NEH_KEY192      0x00000400+0x0c /* 192 bit key                  */
+#define NEH_KEY256      0x00000800+0x0e /* 256 bit key                  */
+
+#define NEH_ENC_GEN     (NEH_ENCRYPT | NEH_GEN_KEY)
+#define NEH_DEC_GEN     (NEH_DECRYPT | NEH_GEN_KEY)
+#define NEH_ENC_LOAD    (NEH_ENCRYPT | NEH_LOAD_KEY)
+#define NEH_DEC_LOAD    (NEH_DECRYPT | NEH_LOAD_KEY)
+
+#define NEH_ENC_GEN_DATA {\
+    NEH_ENC_GEN | NEH_KEY128, 0, 0, 0,\
+    NEH_ENC_GEN | NEH_KEY192, 0, 0, 0,\
+    NEH_ENC_GEN | NEH_KEY256, 0, 0, 0 }
+
+#define NEH_ENC_LOAD_DATA {\
+    NEH_ENC_LOAD | NEH_KEY128, 0, 0, 0,\
+    NEH_ENC_LOAD | NEH_KEY192, 0, 0, 0,\
+    NEH_ENC_LOAD | NEH_KEY256, 0, 0, 0 }
+
+#define NEH_ENC_HYBRID_DATA {\
+    NEH_ENC_GEN  | NEH_KEY128, 0, 0, 0,\
+    NEH_ENC_LOAD | NEH_KEY192, 0, 0, 0,\
+    NEH_ENC_LOAD | NEH_KEY256, 0, 0, 0 }
+
+#define NEH_DEC_GEN_DATA {\
+    NEH_DEC_GEN | NEH_KEY128, 0, 0, 0,\
+    NEH_DEC_GEN | NEH_KEY192, 0, 0, 0,\
+    NEH_DEC_GEN | NEH_KEY256, 0, 0, 0 }
+
+#define NEH_DEC_LOAD_DATA {\
+    NEH_DEC_LOAD | NEH_KEY128, 0, 0, 0,\
+    NEH_DEC_LOAD | NEH_KEY192, 0, 0, 0,\
+    NEH_DEC_LOAD | NEH_KEY256, 0, 0, 0 }
+
+#define NEH_DEC_HYBRID_DATA {\
+    NEH_DEC_GEN  | NEH_KEY128, 0, 0, 0,\
+    NEH_DEC_LOAD | NEH_KEY192, 0, 0, 0,\
+    NEH_DEC_LOAD | NEH_KEY256, 0, 0, 0 }
+
+#define neh_enc_gen_key(x)  ((x) == 128 ? (NEH_ENC_GEN | NEH_KEY128) :      \
+     (x) == 192 ? (NEH_ENC_GEN | NEH_KEY192) : (NEH_ENC_GEN | NEH_KEY256))
+
+#define neh_enc_load_key(x) ((x) == 128 ? (NEH_ENC_LOAD | NEH_KEY128) :     \
+     (x) == 192 ? (NEH_ENC_LOAD | NEH_KEY192) : (NEH_ENC_LOAD | NEH_KEY256))
+
+#define neh_enc_hybrid_key(x)   ((x) == 128 ? (NEH_ENC_GEN | NEH_KEY128) :  \
+     (x) == 192 ? (NEH_ENC_LOAD | NEH_KEY192) : (NEH_ENC_LOAD | NEH_KEY256))
+
+#define neh_dec_gen_key(x)  ((x) == 128 ? (NEH_DEC_GEN | NEH_KEY128) :      \
+     (x) == 192 ? (NEH_DEC_GEN | NEH_KEY192) : (NEH_DEC_GEN | NEH_KEY256))
+
+#define neh_dec_load_key(x) ((x) == 128 ? (NEH_DEC_LOAD | NEH_KEY128) :     \
+     (x) == 192 ? (NEH_DEC_LOAD | NEH_KEY192) : (NEH_DEC_LOAD | NEH_KEY256))
+
+#define neh_dec_hybrid_key(x)   ((x) == 128 ? (NEH_DEC_GEN | NEH_KEY128) :  \
+     (x) == 192 ? (NEH_DEC_LOAD | NEH_KEY192) : (NEH_DEC_LOAD | NEH_KEY256))
+
+#if defined( _MSC_VER ) && ( _MSC_VER > 1200 )
+#define aligned_auto(type, name, no, stride)  __declspec(align(stride)) type name[no]
+#else
+#define aligned_auto(type, name, no, stride)                \
+    unsigned char _##name[no * sizeof(type) + stride];      \
+    type *name = (type*)(16 * ((((unsigned long)(_##name)) + stride - 1) / stride))
+#endif
+
+#if defined( _MSC_VER ) && ( _MSC_VER > 1200 )
+#define aligned_array(type, name, no, stride) __declspec(align(stride)) type name[no]
+#elif defined( __GNUC__ )
+#define aligned_array(type, name, no, stride) type name[no] __attribute__ ((aligned(stride)))
+#else
+#define aligned_array(type, name, no, stride) type name[no]
+#endif
+
+/* VIA ACE codeword     */
+
+static unsigned char via_flags = 0;
+
+#if defined ( _MSC_VER ) && ( _MSC_VER > 800 )
+
+#define NEH_REKEY   __asm pushfd __asm popfd
+#define NEH_AES     __asm _emit 0xf3 __asm _emit 0x0f __asm _emit 0xa7
+#define NEH_ECB     NEH_AES __asm _emit 0xc8
+#define NEH_CBC     NEH_AES __asm _emit 0xd0
+#define NEH_CFB     NEH_AES __asm _emit 0xe0
+#define NEH_OFB     NEH_AES __asm _emit 0xe8
+#define NEH_RNG     __asm _emit 0x0f __asm _emit 0xa7 __asm _emit 0xc0
+
+INLINE int has_cpuid(void)
+{   char ret_value;
+    __asm
+    {   pushfd                  /* save EFLAGS register     */
+        mov     eax,[esp]       /* copy it to eax           */
+        mov     edx,0x00200000  /* CPUID bit position       */
+        xor     eax,edx         /* toggle the CPUID bit     */
+        push    eax             /* attempt to set EFLAGS to */
+        popfd                   /*     the new value        */
+        pushfd                  /* get the new EFLAGS value */
+        pop     eax             /*     into eax             */
+        xor     eax,[esp]       /* xor with original value  */
+        and     eax,edx         /* has CPUID bit changed?   */
+        setne   al              /* set to 1 if we have been */
+        mov     ret_value,al    /*     able to change it    */
+        popfd                   /* restore original EFLAGS  */
+    }
+    return (int)ret_value;
+}
+
+INLINE int is_via_cpu(void)
+{   char ret_value;
+    __asm
+    {   xor     eax,eax         /* use CPUID to get vendor  */
+        cpuid                   /* identity string          */
+        xor     eax,eax         /* is it "CentaurHauls" ?   */
+        sub     ebx,0x746e6543  /* 'Cent'                   */
+        or      eax,ebx
+        sub     edx,0x48727561  /* 'aurH'                   */
+        or      eax,edx
+        sub     ecx,0x736c7561  /* 'auls'                   */
+        or      eax,ecx
+        sete    al              /* set to 1 if it is VIA ID */
+        mov     dl,NEH_CPU_READ /* mark CPU type as read    */
+        or      dl,al           /* & store result in flags  */
+        mov     [via_flags],dl  /* set VIA detected flag    */
+        mov     ret_value,al    /*     able to change it    */
+    }
+    return (int)ret_value;
+}
+
+INLINE int read_via_flags(void)
+{   char ret_value = 0;
+    __asm
+    {
+        mov     eax,0xC0000000  /* Centaur extended CPUID   */
+        cpuid
+        mov     edx,0xc0000001  /* >= 0xc0000001 if support */
+        cmp     eax,edx         /* for VIA extended feature */
+        jnae    no_rng          /*     flags is available   */
+        mov     eax,edx         /* read Centaur extended    */
+        cpuid                   /*     feature flags        */
+        mov     eax,NEH_FLAGS_MASK  /* mask out and save    */
+        and     eax,edx         /*  the RNG and ACE flags   */
+        or      [via_flags],al  /* present & enabled flags  */
+        mov     ret_value,al    /*     able to change it    */
+no_rng:
+    }
+    return (int)ret_value;
+}
+
+INLINE unsigned int via_rng_in(void *buf)
+{   char ret_value = 0x1f;
+    __asm
+    {
+        push    edi
+        mov     edi,buf         /* input buffer address     */
+        xor     edx,edx         /* try to fetch 8 bytes     */
+        NEH_RNG                 /* do RNG read operation    */
+        and     ret_value,al    /* count of bytes returned  */
+        pop     edi
+    }
+    return (int)ret_value;
+}
+
+INLINE void via_ecb_op5(
+            const void *k, const void *c, const void *s, void *d, int l)
+{   __asm
+    {
+        NEH_REKEY
+        mov     ebx, (k)
+        mov     edx, (c)
+        mov     esi, (s)
+        mov     edi, (d)
+        mov     ecx, (l)
+        NEH_ECB
+    }
+}
+
+INLINE void via_cbc_op6(
+            const void *k, const void *c, const void *s, void *d, int l, void *v)
+{   __asm
+    {
+        NEH_REKEY
+        mov     ebx, (k)
+        mov     edx, (c)
+        mov     esi, (s)
+        mov     edi, (d)
+        mov     ecx, (l)
+        mov     eax, (v)
+        NEH_CBC
+    }
+}
+
+INLINE void via_cbc_op7(
+        const void *k, const void *c, const void *s, void *d, int l, void *v, void *w)
+{   __asm
+    {
+        NEH_REKEY
+        mov     ebx, (k)
+        mov     edx, (c)
+        mov     esi, (s)
+        mov     edi, (d)
+        mov     ecx, (l)
+        mov     eax, (v)
+        NEH_CBC
+        mov     esi, eax
+        mov     edi, (w)
+        movsd
+        movsd
+        movsd
+        movsd
+    }
+}
+
+INLINE void via_cfb_op6(
+            const void *k, const void *c, const void *s, void *d, int l, void *v)
+{   __asm
+    {
+        NEH_REKEY
+        mov     ebx, (k)
+        mov     edx, (c)
+        mov     esi, (s)
+        mov     edi, (d)
+        mov     ecx, (l)
+        mov     eax, (v)
+        NEH_CFB
+    }
+}
+
+INLINE void via_cfb_op7(
+        const void *k, const void *c, const void *s, void *d, int l, void *v, void *w)
+{   __asm
+    {
+        NEH_REKEY
+        mov     ebx, (k)
+        mov     edx, (c)
+        mov     esi, (s)
+        mov     edi, (d)
+        mov     ecx, (l)
+        mov     eax, (v)
+        NEH_CFB
+        mov     esi, eax
+        mov     edi, (w)
+        movsd
+        movsd
+        movsd
+        movsd
+    }
+}
+
+INLINE void via_ofb_op6(
+            const void *k, const void *c, const void *s, void *d, int l, void *v)
+{   __asm
+    {
+        NEH_REKEY
+        mov     ebx, (k)
+        mov     edx, (c)
+        mov     esi, (s)
+        mov     edi, (d)
+        mov     ecx, (l)
+        mov     eax, (v)
+        NEH_OFB
+    }
+}
+
+#elif defined( __GNUC__ )
+
+#define NEH_REKEY   asm("pushfl\n popfl\n\t")
+#define NEH_ECB     asm(".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t")
+#define NEH_CBC     asm(".byte 0xf3, 0x0f, 0xa7, 0xd0\n\t")
+#define NEH_CFB     asm(".byte 0xf3, 0x0f, 0xa7, 0xe0\n\t")
+#define NEH_OFB     asm(".byte 0xf3, 0x0f, 0xa7, 0xe8\n\t")
+#define NEH_RNG     asm(".byte 0x0f, 0xa7, 0xc0\n\t");
+
+INLINE int has_cpuid(void)
+{   int val;
+    asm("pushfl\n\t");
+    asm("movl  0(%esp),%eax\n\t");
+    asm("xor   $0x00200000,%eax\n\t");
+    asm("pushl %eax\n\t");
+    asm("popfl\n\t");
+    asm("pushfl\n\t");
+    asm("popl  %eax\n\t");
+    asm("xorl  0(%esp),%edx\n\t");
+    asm("andl  $0x00200000,%eax\n\t");
+    asm("movl  %%eax,%0\n\t" : "=m" (val));
+    asm("popfl\n\t");
+    return val ? 1 : 0;
+}
+
+INLINE int is_via_cpu(void)
+{   int val;
+    asm("xorl %eax,%eax\n\t");
+    asm("cpuid\n\t");
+    asm("xorl %eax,%eax\n\t");
+    asm("subl $0x746e6543,%ebx\n\t");
+    asm("orl  %ebx,%eax\n\t");
+    asm("subl $0x48727561,%edx\n\t");
+    asm("orl  %edx,%eax\n\t");
+    asm("subl $0x736c7561,%ecx\n\t");
+    asm("orl  %ecx,%eax\n\t");
+    asm("movl %%eax,%0\n\t" : "=m" (val));
+    val = (val ? 0 : 1);
+    via_flags = (val | NEH_CPU_READ);
+    return val;
+}
+
+INLINE int read_via_flags(void)
+{   unsigned char   val;
+    asm("movl $0xc0000000,%eax\n\t");
+    asm("cpuid\n\t");
+    asm("movl $0xc0000001,%edx\n\t");
+    asm("cmpl %edx,%eax\n\t");
+    asm("setae %al\n\t");
+    asm("movb %%al,%0\n\t" : "=m" (val));
+    if(!val) return 0;
+    asm("movl $0xc0000001,%eax\n\t");
+    asm("cpuid\n\t");
+    asm("movb %%dl,%0\n\t" : "=m" (val));
+    val &= NEH_FLAGS_MASK;
+    via_flags |= val;
+    return (int) val;
+}
+
+INLINE int via_rng_in(void *buf)
+{   int val;
+    asm("pushl %edi\n\t");
+    asm("movl %0,%%edi\n\t" : : "m" (buf));
+    asm("xorl %edx,%edx\n\t");
+    NEH_RNG
+    asm("andl $0x0000001f,%eax\n\t");
+    asm("movl %%eax,%0\n\t" : "=m" (val));
+    asm("popl %edi\n\t");
+    return val;
+}
+
+INLINE volatile  void via_ecb_op5(
+            const void *k, const void *c, const void *s, void *d, int l)
+{
+    NEH_REKEY;
+    asm("movl %0, %%ebx\n\t" : : "m" (k));
+    asm("movl %0, %%edx\n\t" : : "m" (c));
+    asm("movl %0, %%esi\n\t" : : "m" (s));
+    asm("movl %0, %%edi\n\t" : : "m" (d));
+    asm("movl %0, %%ecx\n\t" : : "m" (l));
+    NEH_ECB;
+}
+
+INLINE volatile  void via_cbc_op6(
+            const void *k, const void *c, const void *s, void *d, int l, void *v)
+{
+    NEH_REKEY;
+    asm("movl %0, %%ebx\n\t" : : "m" (k));
+    asm("movl %0, %%edx\n\t" : : "m" (c));
+    asm("movl %0, %%esi\n\t" : : "m" (s));
+    asm("movl %0, %%edi\n\t" : : "m" (d));
+    asm("movl %0, %%ecx\n\t" : : "m" (l));
+    asm("movl %0, %%eax\n\t" : : "m" (v));
+    NEH_CBC;
+}
+
+INLINE volatile  void via_cbc_op7(
+        const void *k, const void *c, const void *s, void *d, int l, void *v, void *w)
+{
+    NEH_REKEY;
+    asm("movl %0, %%ebx\n\t" : : "m" (k));
+    asm("movl %0, %%edx\n\t" : : "m" (c));
+    asm("movl %0, %%esi\n\t" : : "m" (s));
+    asm("movl %0, %%edi\n\t" : : "m" (d));
+    asm("movl %0, %%ecx\n\t" : : "m" (l));
+    asm("movl %0, %%eax\n\t" : : "m" (v));
+    NEH_CBC;
+    asm("movl %eax,%esi\n\t");
+    asm("movl %0, %%edi\n\t" : : "m" (w));
+    asm("movsl; movsl; movsl; movsl\n\t");
+}
+
+INLINE volatile  void via_cfb_op6(
+            const void *k, const void *c, const void *s, void *d, int l, void *v)
+{
+    NEH_REKEY;
+    asm("movl %0, %%ebx\n\t" : : "m" (k));
+    asm("movl %0, %%edx\n\t" : : "m" (c));
+    asm("movl %0, %%esi\n\t" : : "m" (s));
+    asm("movl %0, %%edi\n\t" : : "m" (d));
+    asm("movl %0, %%ecx\n\t" : : "m" (l));
+    asm("movl %0, %%eax\n\t" : : "m" (v));
+    NEH_CFB;
+}
+
+INLINE volatile  void via_cfb_op7(
+        const void *k, const void *c, const void *s, void *d, int l, void *v, void *w)
+{
+    NEH_REKEY;
+    asm("movl %0, %%ebx\n\t" : : "m" (k));
+    asm("movl %0, %%edx\n\t" : : "m" (c));
+    asm("movl %0, %%esi\n\t" : : "m" (s));
+    asm("movl %0, %%edi\n\t" : : "m" (d));
+    asm("movl %0, %%ecx\n\t" : : "m" (l));
+    asm("movl %0, %%eax\n\t" : : "m" (v));
+    NEH_CFB;
+    asm("movl %eax,%esi\n\t");
+    asm("movl %0, %%edi\n\t" : : "m" (w));
+    asm("movsl; movsl; movsl; movsl\n\t");
+}
+
+INLINE volatile  void via_ofb_op6(
+            const void *k, const void *c, const void *s, void *d, int l, void *v)
+{
+    NEH_REKEY;
+    asm("movl %0, %%ebx\n\t" : : "m" (k));
+    asm("movl %0, %%edx\n\t" : : "m" (c));
+    asm("movl %0, %%esi\n\t" : : "m" (s));
+    asm("movl %0, %%edi\n\t" : : "m" (d));
+    asm("movl %0, %%ecx\n\t" : : "m" (l));
+    asm("movl %0, %%eax\n\t" : : "m" (v));
+    NEH_OFB;
+}
+
+#else
+#error VIA ACE is not available with this compiler
+#endif
+
+INLINE int via_ace_test(void)
+{
+    return has_cpuid() && is_via_cpu() && ((read_via_flags() & NEH_ACE_FLAGS) == NEH_ACE_FLAGS);
+}
+
+#define VIA_ACE_AVAILABLE   (((via_flags & NEH_ACE_FLAGS) == NEH_ACE_FLAGS)         \
+    || (via_flags & NEH_CPU_READ) && (via_flags & NEH_CPU_IS_VIA) || via_ace_test())
+
+INLINE int via_rng_test(void)
+{
+    return has_cpuid() && is_via_cpu() && ((read_via_flags() & NEH_RNG_FLAGS) == NEH_RNG_FLAGS);
+}
+
+#define VIA_RNG_AVAILABLE   (((via_flags & NEH_RNG_FLAGS) == NEH_RNG_FLAGS)         \
+    || (via_flags & NEH_CPU_READ) && (via_flags & NEH_CPU_IS_VIA) || via_rng_test())
+
+INLINE int read_via_rng(void *buf, int count)
+{   int nbr, max_reads, lcnt = count;
+    unsigned char *p, *q;
+    aligned_auto(unsigned char, bp, 64, 16);
+
+    if(!VIA_RNG_AVAILABLE)
+        return 0;
+
+    do
+    {
+        max_reads = MAX_READ_ATTEMPTS;
+        do
+            nbr = via_rng_in(bp);
+        while
+            (nbr == 0 && --max_reads);
+
+        lcnt -= nbr;
+        p = (unsigned char*)buf; q = bp;
+        while(nbr--)
+            *p++ = *q++;
+    }
+    while
+        (lcnt && max_reads);
+
+    return count - lcnt;
+}
+
+#endif
--- a/src/java/kp2akeytransform/jni/aes/aes_x86_v1.asm
+++ b/src/java/kp2akeytransform/jni/aes/aes_x86_v1.asm
@ -0,0 +1,644 @@
+
+; ---------------------------------------------------------------------------
+; Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+;
+; LICENSE TERMS
+;
+; The redistribution and use of this software (with or without changes)
+; is allowed without the payment of fees or royalties provided that:
+;
+;  1. source code distributions include the above copyright notice, this
+;     list of conditions and the following disclaimer;
+;
+;  2. binary distributions include the above copyright notice, this list
+;     of conditions and the following disclaimer in their documentation;
+;
+;  3. the name of the copyright holder is not used to endorse products
+;     built using this software without specific written permission.
+;
+; DISCLAIMER
+;
+; This software is provided 'as is' with no explicit or implied warranties
+; in respect of its properties, including, but not limited to, correctness
+; and/or fitness for purpose.
+; ---------------------------------------------------------------------------
+; Issue 13/08/2008
+;
+; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files
+; aeskey.c and aestab.c for support.
+
+; An AES implementation for x86 processors using the YASM (or NASM) assembler.
+; This is an assembler implementation that covers encryption and decryption
+; only and is intended as a replacement of the C file aescrypt.c. It hence
+; requires the file aeskey.c for keying and aestab.c for the AES tables. It
+; employs full tables rather than compressed tables.
+
+; This code provides the standard AES block size (128 bits, 16 bytes) and the
+; three standard AES key sizes (128, 192 and 256 bits). It has the same call
+; interface as my C implementation. The ebx, esi, edi and ebp registers are
+; preserved across calls but eax, ecx and edx and the artihmetic status flags
+; are not.  It is also important that the defines below match those used in the
+; C code.  This code uses the VC++ register saving conentions; if it is used
+; with another compiler, conventions for using and saving registers may need to
+; be checked (and calling conventions).  The YASM command line for the VC++
+; custom build step is:
+;
+;    yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
+;
+;  The calling intefaces are:
+;
+;     AES_RETURN aes_encrypt(const unsigned char in_blk[],
+;                   unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_decrypt(const unsigned char in_blk[],
+;                   unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
+;                                            const aes_encrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
+;                                            const aes_decrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_encrypt_key(const unsigned char key[],
+;                           unsigned int len, const aes_decrypt_ctx cx[1]);
+;
+;     AES_RETURN aes_decrypt_key(const unsigned char key[],
+;                           unsigned int len, const aes_decrypt_ctx cx[1]);
+;
+; where <NNN> is 128, 102 or 256.  In the last two calls the length can be in
+; either bits or bytes.
+;
+; Comment in/out the following lines to obtain the desired subroutines. These
+; selections MUST match those in the C header file aes.h
+
+%define AES_128                 ; define if AES with 128 bit keys is needed
+%define AES_192                 ; define if AES with 192 bit keys is needed
+%define AES_256                 ; define if AES with 256 bit keys is needed
+%define AES_VAR                 ; define if a variable key size is needed
+%define ENCRYPTION              ; define if encryption is needed
+%define DECRYPTION              ; define if decryption is needed
+%define AES_REV_DKS             ; define if key decryption schedule is reversed
+%define LAST_ROUND_TABLES       ; define if tables are to be used for last round
+
+; offsets to parameters
+
+in_blk  equ     4   ; input byte array address parameter
+out_blk equ     8   ; output byte array address parameter
+ctx     equ    12   ; AES context structure
+stk_spc equ    20   ; stack space
+%define parms  12   ; parameter space on stack
+
+; The encryption key schedule has the following in memory layout where N is the
+; number of rounds (10, 12 or 14):
+;
+; lo: | input key (round 0)  |  ; each round is four 32-bit words
+;     | encryption round 1   |
+;     | encryption round 2   |
+;     ....
+;     | encryption round N-1 |
+; hi: | encryption round N   |
+;
+; The decryption key schedule is normally set up so that it has the same
+; layout as above by actually reversing the order of the encryption key
+; schedule in memory (this happens when AES_REV_DKS is set):
+;
+; lo: | decryption round 0   | =              | encryption round N   |
+;     | decryption round 1   | = INV_MIX_COL[ | encryption round N-1 | ]
+;     | decryption round 2   | = INV_MIX_COL[ | encryption round N-2 | ]
+;     ....                       ....
+;     | decryption round N-1 | = INV_MIX_COL[ | encryption round 1   | ]
+; hi: | decryption round N   | =              | input key (round 0)  |
+;
+; with rounds except the first and last modified using inv_mix_column()
+; But if AES_REV_DKS is NOT set the order of keys is left as it is for
+; encryption so that it has to be accessed in reverse when used for
+; decryption (although the inverse mix column modifications are done)
+;
+; lo: | decryption round 0   | =              | input key (round 0)  |
+;     | decryption round 1   | = INV_MIX_COL[ | encryption round 1   | ]
+;     | decryption round 2   | = INV_MIX_COL[ | encryption round 2   | ]
+;     ....                       ....
+;     | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
+; hi: | decryption round N   | =              | encryption round N   |
+;
+; This layout is faster when the assembler key scheduling provided here
+; is used.
+;
+; The DLL interface must use the _stdcall convention in which the number
+; of bytes of parameter space is added after an @ to the sutine's name.
+; We must also remove our parameters from the stack before return (see
+; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
+
+;%define DLL_EXPORT
+
+; End of user defines
+
+%ifdef AES_VAR
+%ifndef AES_128
+%define AES_128
+%endif
+%ifndef AES_192
+%define AES_192
+%endif
+%ifndef AES_256
+%define AES_256
+%endif
+%endif
+
+%ifdef AES_VAR
+%define KS_LENGTH       60
+%elifdef AES_256
+%define KS_LENGTH       60
+%elifdef AES_192
+%define KS_LENGTH       52
+%else
+%define KS_LENGTH       44
+%endif
+
+; These macros implement stack based local variables
+
+%macro  save 2
+    mov     [esp+4*%1],%2
+%endmacro
+
+%macro  restore 2
+    mov     %1,[esp+4*%2]
+%endmacro
+
+; the DLL has to implement the _stdcall calling interface on return
+; In this case we have to take our parameters (3 4-byte pointers)
+; off the stack
+
+%macro  do_name 1-2 parms
+%ifndef DLL_EXPORT
+    global  %1
+%1:
+%else
+    global  %1@%2
+    export  %1@%2
+%1@%2:
+%endif
+%endmacro
+
+%macro  do_call 1-2 parms
+%ifndef DLL_EXPORT
+    call    %1
+    add     esp,%2
+%else
+    call    %1@%2
+%endif
+%endmacro
+
+%macro  do_exit  0-1 parms
+%ifdef DLL_EXPORT
+    ret %1
+%else
+    ret
+%endif
+%endmacro
+
+%ifdef  ENCRYPTION
+
+    extern  _t_fn
+
+%define etab_0(x)   [_t_fn+4*x]
+%define etab_1(x)   [_t_fn+1024+4*x]
+%define etab_2(x)   [_t_fn+2048+4*x]
+%define etab_3(x)   [_t_fn+3072+4*x]
+
+%ifdef LAST_ROUND_TABLES
+
+    extern  _t_fl
+
+%define eltab_0(x)  [_t_fl+4*x]
+%define eltab_1(x)  [_t_fl+1024+4*x]
+%define eltab_2(x)  [_t_fl+2048+4*x]
+%define eltab_3(x)  [_t_fl+3072+4*x]
+
+%else
+
+%define etab_b(x)   byte [_t_fn+3072+4*x]
+
+%endif
+
+; ROUND FUNCTION.  Build column[2] on ESI and column[3] on EDI that have the
+; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
+;
+; Input:
+;
+;   EAX     column[0]
+;   EBX     column[1]
+;   ECX     column[2]
+;   EDX     column[3]
+;   ESI     column key[round][2]
+;   EDI     column key[round][3]
+;   EBP     scratch
+;
+; Output:
+;
+;   EBP     column[0]   unkeyed
+;   EBX     column[1]   unkeyed
+;   ESI     column[2]   keyed
+;   EDI     column[3]   keyed
+;   EAX     scratch
+;   ECX     scratch
+;   EDX     scratch
+
+%macro rnd_fun 2
+
+    rol     ebx,16
+    %1      esi, cl, 0, ebp
+    %1      esi, dh, 1, ebp
+    %1      esi, bh, 3, ebp
+    %1      edi, dl, 0, ebp
+    %1      edi, ah, 1, ebp
+    %1      edi, bl, 2, ebp
+    %2      ebp, al, 0, ebp
+    shr     ebx,16
+    and     eax,0xffff0000
+    or      eax,ebx
+    shr     edx,16
+    %1      ebp, ah, 1, ebx
+    %1      ebp, dh, 3, ebx
+    %2      ebx, dl, 2, ebx
+    %1      ebx, ch, 1, edx
+    %1      ebx, al, 0, edx
+    shr     eax,16
+    shr     ecx,16
+    %1      ebp, cl, 2, edx
+    %1      edi, ch, 3, edx
+    %1      esi, al, 2, edx
+    %1      ebx, ah, 3, edx
+
+%endmacro
+
+; Basic MOV and XOR Operations for normal rounds
+
+%macro  nr_xor  4
+    movzx   %4,%2
+    xor     %1,etab_%3(%4)
+%endmacro
+
+%macro  nr_mov  4
+    movzx   %4,%2
+    mov     %1,etab_%3(%4)
+%endmacro
+
+; Basic MOV and XOR Operations for last round
+
+%ifdef LAST_ROUND_TABLES
+
+    %macro  lr_xor  4
+        movzx   %4,%2
+        xor     %1,eltab_%3(%4)
+    %endmacro
+
+    %macro  lr_mov  4
+        movzx   %4,%2
+        mov     %1,eltab_%3(%4)
+    %endmacro
+
+%else
+
+    %macro  lr_xor  4
+        movzx   %4,%2
+        movzx   %4,etab_b(%4)
+    %if %3 != 0
+        shl     %4,8*%3
+    %endif
+        xor     %1,%4
+    %endmacro
+
+    %macro  lr_mov  4
+        movzx   %4,%2
+        movzx   %1,etab_b(%4)
+    %if %3 != 0
+        shl     %1,8*%3
+    %endif
+    %endmacro
+
+%endif
+
+%macro enc_round 0
+
+    add     ebp,16
+    save    0,ebp
+    mov     esi,[ebp+8]
+    mov     edi,[ebp+12]
+
+    rnd_fun nr_xor, nr_mov
+
+    mov     eax,ebp
+    mov     ecx,esi
+    mov     edx,edi
+    restore ebp,0
+    xor     eax,[ebp]
+    xor     ebx,[ebp+4]
+
+%endmacro
+
+%macro enc_last_round 0
+
+    add     ebp,16
+    save    0,ebp
+    mov     esi,[ebp+8]
+    mov     edi,[ebp+12]
+
+    rnd_fun lr_xor, lr_mov
+
+    mov     eax,ebp
+    restore ebp,0
+    xor     eax,[ebp]
+    xor     ebx,[ebp+4]
+
+%endmacro
+
+    section .text align=32
+
+; AES Encryption Subroutine
+
+    align   32
+    do_name _aes_encrypt
+
+    sub     esp,stk_spc
+    mov     [esp+16],ebp
+    mov     [esp+12],ebx
+    mov     [esp+ 8],esi
+    mov     [esp+ 4],edi
+
+    mov     esi,[esp+in_blk+stk_spc] ; input pointer
+    mov     eax,[esi   ]
+    mov     ebx,[esi+ 4]
+    mov     ecx,[esi+ 8]
+    mov     edx,[esi+12]
+
+    mov     ebp,[esp+ctx+stk_spc]    ; key pointer
+    movzx   edi,byte [ebp+4*KS_LENGTH]
+    xor     eax,[ebp   ]
+    xor     ebx,[ebp+ 4]
+    xor     ecx,[ebp+ 8]
+    xor     edx,[ebp+12]
+
+; determine the number of rounds
+
+    cmp     edi,10*16
+    je      .3
+    cmp     edi,12*16
+    je      .2
+    cmp     edi,14*16
+    je      .1
+    mov     eax,-1
+    jmp     .5
+
+.1: enc_round
+    enc_round
+.2: enc_round
+    enc_round
+.3: enc_round
+    enc_round
+    enc_round
+    enc_round
+    enc_round
+    enc_round
+    enc_round
+    enc_round
+    enc_round
+    enc_last_round
+
+    mov     edx,[esp+out_blk+stk_spc]
+    mov     [edx],eax
+    mov     [edx+4],ebx
+    mov     [edx+8],esi
+    mov     [edx+12],edi
+    xor     eax,eax
+
+.5: mov     ebp,[esp+16]
+    mov     ebx,[esp+12]
+    mov     esi,[esp+ 8]
+    mov     edi,[esp+ 4]
+    add     esp,stk_spc
+    do_exit
+
+%endif
+
+%ifdef  DECRYPTION
+
+    extern  _t_in
+
+%define dtab_0(x)   [_t_in+4*x]
+%define dtab_1(x)   [_t_in+1024+4*x]
+%define dtab_2(x)   [_t_in+2048+4*x]
+%define dtab_3(x)   [_t_in+3072+4*x]
+
+%ifdef LAST_ROUND_TABLES
+
+    extern  _t_il
+
+%define dltab_0(x)  [_t_il+4*x]
+%define dltab_1(x)  [_t_il+1024+4*x]
+%define dltab_2(x)  [_t_il+2048+4*x]
+%define dltab_3(x)  [_t_il+3072+4*x]
+
+%else
+
+    extern  _t_ibox
+
+%define dtab_x(x)   byte [_t_ibox+x]
+
+%endif
+
+%macro irn_fun 2
+
+    rol eax,16
+    %1      esi, cl, 0, ebp
+    %1      esi, bh, 1, ebp
+    %1      esi, al, 2, ebp
+    %1      edi, dl, 0, ebp
+    %1      edi, ch, 1, ebp
+    %1      edi, ah, 3, ebp
+    %2      ebp, bl, 0, ebp
+    shr     eax,16
+    and     ebx,0xffff0000
+    or      ebx,eax
+    shr     ecx,16
+    %1      ebp, bh, 1, eax
+    %1      ebp, ch, 3, eax
+    %2      eax, cl, 2, ecx
+    %1      eax, bl, 0, ecx
+    %1      eax, dh, 1, ecx
+    shr     ebx,16
+    shr     edx,16
+    %1      esi, dh, 3, ecx
+    %1      ebp, dl, 2, ecx
+    %1      eax, bh, 3, ecx
+    %1      edi, bl, 2, ecx
+
+%endmacro
+
+; Basic MOV and XOR Operations for normal rounds
+
+%macro  ni_xor  4
+    movzx   %4,%2
+    xor     %1,dtab_%3(%4)
+%endmacro
+
+%macro  ni_mov  4
+    movzx   %4,%2
+    mov     %1,dtab_%3(%4)
+%endmacro
+
+; Basic MOV and XOR Operations for last round
+
+%ifdef LAST_ROUND_TABLES
+
+%macro  li_xor  4
+    movzx   %4,%2
+    xor     %1,dltab_%3(%4)
+%endmacro
+
+%macro  li_mov  4
+    movzx   %4,%2
+    mov     %1,dltab_%3(%4)
+%endmacro
+
+%else
+
+    %macro  li_xor  4
+        movzx   %4,%2
+        movzx   %4,dtab_x(%4)
+    %if %3 != 0
+        shl     %4,8*%3
+    %endif
+        xor     %1,%4
+    %endmacro
+
+    %macro  li_mov  4
+        movzx   %4,%2
+        movzx   %1,dtab_x(%4)
+    %if %3 != 0
+        shl     %1,8*%3
+    %endif
+    %endmacro
+
+%endif
+
+%macro dec_round 0
+
+%ifdef AES_REV_DKS
+    add     ebp,16
+%else
+    sub     ebp,16
+%endif
+    save    0,ebp
+    mov     esi,[ebp+8]
+    mov     edi,[ebp+12]
+
+    irn_fun ni_xor, ni_mov
+
+    mov     ebx,ebp
+    mov     ecx,esi
+    mov     edx,edi
+    restore ebp,0
+    xor     eax,[ebp]
+    xor     ebx,[ebp+4]
+
+%endmacro
+
+%macro dec_last_round 0
+
+%ifdef AES_REV_DKS
+    add     ebp,16
+%else
+    sub     ebp,16
+%endif
+    save    0,ebp
+    mov     esi,[ebp+8]
+    mov     edi,[ebp+12]
+
+    irn_fun li_xor, li_mov
+
+    mov     ebx,ebp
+    restore ebp,0
+    xor     eax,[ebp]
+    xor     ebx,[ebp+4]
+
+%endmacro
+
+    section .text
+
+; AES Decryption Subroutine
+
+    align   32
+    do_name _aes_decrypt
+
+    sub     esp,stk_spc
+    mov     [esp+16],ebp
+    mov     [esp+12],ebx
+    mov     [esp+ 8],esi
+    mov     [esp+ 4],edi
+
+; input four columns and xor in first round key
+
+    mov     esi,[esp+in_blk+stk_spc] ; input pointer
+    mov     eax,[esi   ]
+    mov     ebx,[esi+ 4]
+    mov     ecx,[esi+ 8]
+    mov     edx,[esi+12]
+    lea     esi,[esi+16]
+
+    mov     ebp,[esp+ctx+stk_spc]    ; key pointer
+    movzx   edi,byte[ebp+4*KS_LENGTH]
+%ifndef  AES_REV_DKS        ; if decryption key schedule is not reversed
+    lea     ebp,[ebp+edi]   ; we have to access it from the top down
+%endif
+    xor     eax,[ebp   ]    ; key schedule
+    xor     ebx,[ebp+ 4]
+    xor     ecx,[ebp+ 8]
+    xor     edx,[ebp+12]
+
+; determine the number of rounds
+
+    cmp     edi,10*16
+    je      .3
+    cmp     edi,12*16
+    je      .2
+    cmp     edi,14*16
+    je      .1
+    mov     eax,-1
+    jmp     .5
+
+.1: dec_round
+    dec_round
+.2: dec_round
+    dec_round
+.3: dec_round
+    dec_round
+    dec_round
+    dec_round
+    dec_round
+    dec_round
+    dec_round
+    dec_round
+    dec_round
+    dec_last_round
+
+; move final values to the output array.
+
+    mov     ebp,[esp+out_blk+stk_spc]
+    mov     [ebp],eax
+    mov     [ebp+4],ebx
+    mov     [ebp+8],esi
+    mov     [ebp+12],edi
+    xor     eax,eax
+
+.5: mov     ebp,[esp+16]
+    mov     ebx,[esp+12]
+    mov     esi,[esp+ 8]
+    mov     edi,[esp+ 4]
+    add     esp,stk_spc
+    do_exit
+
+%endif
+
+    end
+
--- a/src/java/kp2akeytransform/jni/aes/aes_x86_v2.asm
+++ b/src/java/kp2akeytransform/jni/aes/aes_x86_v2.asm
--- a/src/java/kp2akeytransform/jni/aes/aescpp.h
+++ b/src/java/kp2akeytransform/jni/aes/aescpp.h
@ -0,0 +1,148 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+
+ This file contains the definitions required to use AES (Rijndael) in C++.
+*/
+
+#ifndef _AESCPP_H
+#define _AESCPP_H
+
+#include "aes.h"
+
+#if defined( AES_ENCRYPT )
+
+class AESencrypt
+{
+public:
+    aes_encrypt_ctx cx[1];
+    AESencrypt(void) { aes_init(); };
+#if defined(AES_128)
+    AESencrypt(const unsigned char key[])
+        {   aes_encrypt_key128(key, cx); }
+    AES_RETURN key128(const unsigned char key[])
+        {   return aes_encrypt_key128(key, cx); }
+#endif
+#if defined(AES_192)
+    AES_RETURN key192(const unsigned char key[])
+        {   return aes_encrypt_key192(key, cx); }
+#endif
+#if defined(AES_256)
+    AES_RETURN key256(const unsigned char key[])
+        {   return aes_encrypt_key256(key, cx); }
+#endif
+#if defined(AES_VAR)
+    AES_RETURN key(const unsigned char key[], int key_len)
+        {   return aes_encrypt_key(key, key_len, cx); }
+#endif
+    AES_RETURN encrypt(const unsigned char in[], unsigned char out[]) const
+        {   return aes_encrypt(in, out, cx);  }
+#ifndef AES_MODES
+    AES_RETURN ecb_encrypt(const unsigned char in[], unsigned char out[], int nb) const
+        {   while(nb--)
+            {   aes_encrypt(in, out, cx), in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE; }
+        }
+#endif
+#ifdef AES_MODES
+    AES_RETURN mode_reset(void)   { return aes_mode_reset(cx); }
+
+    AES_RETURN ecb_encrypt(const unsigned char in[], unsigned char out[], int nb) const
+        {   return aes_ecb_encrypt(in, out, nb, cx);  }
+
+    AES_RETURN cbc_encrypt(const unsigned char in[], unsigned char out[], int nb,
+                                    unsigned char iv[]) const
+        {   return aes_cbc_encrypt(in, out, nb, iv, cx);  }
+
+    AES_RETURN cfb_encrypt(const unsigned char in[], unsigned char out[], int nb,
+                                    unsigned char iv[])
+        {   return aes_cfb_encrypt(in, out, nb, iv, cx);  }
+
+    AES_RETURN cfb_decrypt(const unsigned char in[], unsigned char out[], int nb,
+                                    unsigned char iv[])
+        {   return aes_cfb_decrypt(in, out, nb, iv, cx);  }
+
+    AES_RETURN ofb_crypt(const unsigned char in[], unsigned char out[], int nb,
+                                    unsigned char iv[])
+        {   return aes_ofb_crypt(in, out, nb, iv, cx);  }
+
+    typedef void ctr_fn(unsigned char ctr[]);
+
+    AES_RETURN ctr_crypt(const unsigned char in[], unsigned char out[], int nb,
+                                    unsigned char iv[], ctr_fn cf)
+        {   return aes_ctr_crypt(in, out, nb, iv, cf, cx);  }
+
+#endif
+
+};
+
+#endif
+
+#if defined( AES_DECRYPT )
+
+class AESdecrypt
+{
+public:
+    aes_decrypt_ctx cx[1];
+    AESdecrypt(void) { aes_init(); };
+#if defined(AES_128)
+    AESdecrypt(const unsigned char key[])
+            { aes_decrypt_key128(key, cx); }
+    AES_RETURN key128(const unsigned char key[])
+            { return aes_decrypt_key128(key, cx); }
+#endif
+#if defined(AES_192)
+    AES_RETURN key192(const unsigned char key[])
+            { return aes_decrypt_key192(key, cx); }
+#endif
+#if defined(AES_256)
+    AES_RETURN key256(const unsigned char key[])
+            { return aes_decrypt_key256(key, cx); }
+#endif
+#if defined(AES_VAR)
+    AES_RETURN key(const unsigned char key[], int key_len)
+            { return aes_decrypt_key(key, key_len, cx); }
+#endif
+    AES_RETURN decrypt(const unsigned char in[], unsigned char out[]) const
+        {   return aes_decrypt(in, out, cx);  }
+#ifndef AES_MODES
+    AES_RETURN ecb_decrypt(const unsigned char in[], unsigned char out[], int nb) const
+        {   while(nb--)
+            {   aes_decrypt(in, out, cx), in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE; }
+        }
+#endif
+#ifdef AES_MODES
+
+    AES_RETURN ecb_decrypt(const unsigned char in[], unsigned char out[], int nb) const
+        {   return aes_ecb_decrypt(in, out, nb, cx);  }
+
+    AES_RETURN cbc_decrypt(const unsigned char in[], unsigned char out[], int nb,
+                                    unsigned char iv[]) const
+        {   return aes_cbc_decrypt(in, out, nb, iv, cx);  }
+#endif
+};
+
+#endif
+
+#endif
--- a/src/java/kp2akeytransform/jni/aes/aescrypt.c
+++ b/src/java/kp2akeytransform/jni/aes/aescrypt.c
@ -0,0 +1,301 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+*/
+
+#include "aesopt.h"
+#include "aestab.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
+#define so(y,x,c)   word_out(y, c, s(x,c))
+
+#if defined(ARRAYS)
+#define locals(y,x)     x[4],y[4]
+#else
+#define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
+#endif
+
+#define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
+                        s(y,2) = s(x,2); s(y,3) = s(x,3);
+#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
+#define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
+#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
+
+#if ( FUNCS_IN_C & ENCRYPTION_IN_C )
+
+/* Visual C++ .Net v7.1 provides the fastest encryption code when using
+   Pentium optimiation with small code but this is poor for decryption
+   so we need to control this with the following VC++ pragmas
+*/
+
+#if defined( _MSC_VER ) && !defined( _WIN64 )
+#pragma optimize( "s", on )
+#endif
+
+/* Given the column (c) of the output state variable, the following
+   macros give the input state variables which are needed in its
+   computation for each row (r) of the state. All the alternative
+   macros give the same end values but expand into different ways
+   of calculating these values.  In particular the complex macro
+   used for dynamically variable block sizes is designed to expand
+   to a compile time constant whenever possible but will expand to
+   conditional clauses on some branches (I am grateful to Frank
+   Yellin for this construction)
+*/
+
+#define fwd_var(x,r,c)\
+ ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
+ : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
+ : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
+ :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
+
+#if defined(FT4_SET)
+#undef  dec_fmvars
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
+#elif defined(FT1_SET)
+#undef  dec_fmvars
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
+#else
+#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
+#endif
+
+#if defined(FL4_SET)
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
+#elif defined(FL1_SET)
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
+#else
+#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
+#endif
+
+AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
+{   uint_32t         locals(b0, b1);
+    const uint_32t   *kp;
+#if defined( dec_fmvars )
+    dec_fmvars; /* declare variables for fwd_mcol() if needed */
+#endif
+
+    if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 )
+        return EXIT_FAILURE;
+
+    kp = cx->ks;
+    state_in(b0, in, kp);
+
+#if (ENC_UNROLL == FULL)
+
+    switch(cx->inf.b[0])
+    {
+    case 14 * 16:
+        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+        kp += 2 * N_COLS;
+    case 12 * 16:
+        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+        kp += 2 * N_COLS;
+    case 10 * 16:
+        round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
+        round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
+        round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
+        round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
+        round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
+        round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
+        round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
+    }
+
+#else
+
+#if (ENC_UNROLL == PARTIAL)
+    {   uint_32t    rnd;
+        for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
+        {
+            kp += N_COLS;
+            round(fwd_rnd, b1, b0, kp);
+            kp += N_COLS;
+            round(fwd_rnd, b0, b1, kp);
+        }
+        kp += N_COLS;
+        round(fwd_rnd,  b1, b0, kp);
+#else
+    {   uint_32t    rnd;
+        for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
+        {
+            kp += N_COLS;
+            round(fwd_rnd, b1, b0, kp);
+            l_copy(b0, b1);
+        }
+#endif
+        kp += N_COLS;
+        round(fwd_lrnd, b0, b1, kp);
+    }
+#endif
+
+    state_out(out, b0);
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if ( FUNCS_IN_C & DECRYPTION_IN_C)
+
+/* Visual C++ .Net v7.1 provides the fastest encryption code when using
+   Pentium optimiation with small code but this is poor for decryption
+   so we need to control this with the following VC++ pragmas
+*/
+
+#if defined( _MSC_VER ) && !defined( _WIN64 )
+#pragma optimize( "t", on )
+#endif
+
+/* Given the column (c) of the output state variable, the following
+   macros give the input state variables which are needed in its
+   computation for each row (r) of the state. All the alternative
+   macros give the same end values but expand into different ways
+   of calculating these values.  In particular the complex macro
+   used for dynamically variable block sizes is designed to expand
+   to a compile time constant whenever possible but will expand to
+   conditional clauses on some branches (I am grateful to Frank
+   Yellin for this construction)
+*/
+
+#define inv_var(x,r,c)\
+ ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
+ : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
+ : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
+ :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
+
+#if defined(IT4_SET)
+#undef  dec_imvars
+#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
+#elif defined(IT1_SET)
+#undef  dec_imvars
+#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
+#else
+#define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
+#endif
+
+#if defined(IL4_SET)
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
+#elif defined(IL1_SET)
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
+#else
+#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
+#endif
+
+/* This code can work with the decryption key schedule in the   */
+/* order that is used for encrytpion (where the 1st decryption  */
+/* round key is at the high end ot the schedule) or with a key  */
+/* schedule that has been reversed to put the 1st decryption    */
+/* round key at the low end of the schedule in memory (when     */
+/* AES_REV_DKS is defined)                                      */
+
+#ifdef AES_REV_DKS
+#define key_ofs     0
+#define rnd_key(n)  (kp + n * N_COLS)
+#else
+#define key_ofs     1
+#define rnd_key(n)  (kp - n * N_COLS)
+#endif
+
+AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
+{   uint_32t        locals(b0, b1);
+#if defined( dec_imvars )
+    dec_imvars; /* declare variables for inv_mcol() if needed */
+#endif
+    const uint_32t *kp;
+
+    if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 )
+        return EXIT_FAILURE;
+
+    kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0);
+    state_in(b0, in, kp);
+
+#if (DEC_UNROLL == FULL)
+
+    kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2));
+    switch(cx->inf.b[0])
+    {
+    case 14 * 16:
+        round(inv_rnd,  b1, b0, rnd_key(-13));
+        round(inv_rnd,  b0, b1, rnd_key(-12));
+    case 12 * 16:
+        round(inv_rnd,  b1, b0, rnd_key(-11));
+        round(inv_rnd,  b0, b1, rnd_key(-10));
+    case 10 * 16:
+        round(inv_rnd,  b1, b0, rnd_key(-9));
+        round(inv_rnd,  b0, b1, rnd_key(-8));
+        round(inv_rnd,  b1, b0, rnd_key(-7));
+        round(inv_rnd,  b0, b1, rnd_key(-6));
+        round(inv_rnd,  b1, b0, rnd_key(-5));
+        round(inv_rnd,  b0, b1, rnd_key(-4));
+        round(inv_rnd,  b1, b0, rnd_key(-3));
+        round(inv_rnd,  b0, b1, rnd_key(-2));
+        round(inv_rnd,  b1, b0, rnd_key(-1));
+        round(inv_lrnd, b0, b1, rnd_key( 0));
+    }
+
+#else
+
+#if (DEC_UNROLL == PARTIAL)
+    {   uint_32t    rnd;
+        for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
+        {
+            kp = rnd_key(1);
+            round(inv_rnd, b1, b0, kp);
+            kp = rnd_key(1);
+            round(inv_rnd, b0, b1, kp);
+        }
+        kp = rnd_key(1);
+        round(inv_rnd, b1, b0, kp);
+#else
+    {   uint_32t    rnd;
+        for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
+        {
+            kp = rnd_key(1);
+            round(inv_rnd, b1, b0, kp);
+            l_copy(b0, b1);
+        }
+#endif
+        kp = rnd_key(1);
+        round(inv_lrnd, b0, b1, kp);
+        }
+#endif
+
+    state_out(out, b0);
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
--- a/src/java/kp2akeytransform/jni/aes/aeskey.c
+++ b/src/java/kp2akeytransform/jni/aes/aeskey.c
@ -0,0 +1,555 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+*/
+
+#include "aesopt.h"
+#include "aestab.h"
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+#  include "aes_via_ace.h"
+#endif
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+/* Initialise the key schedule from the user supplied key. The key
+   length can be specified in bytes, with legal values of 16, 24
+   and 32, or in bits, with legal values of 128, 192 and 256. These
+   values correspond with Nk values of 4, 6 and 8 respectively.
+
+   The following macros implement a single cycle in the key
+   schedule generation process. The number of cycles needed
+   for each cx->n_col and nk value is:
+
+    nk =             4  5  6  7  8
+    ------------------------------
+    cx->n_col = 4   10  9  8  7  7
+    cx->n_col = 5   14 11 10  9  9
+    cx->n_col = 6   19 15 12 11 11
+    cx->n_col = 7   21 19 16 13 14
+    cx->n_col = 8   29 23 19 17 14
+*/
+
+#if defined( REDUCE_CODE_SIZE )
+#  define ls_box ls_sub
+   uint_32t ls_sub(const uint_32t t, const uint_32t n);
+#  define inv_mcol im_sub
+   uint_32t im_sub(const uint_32t x);
+#  ifdef ENC_KS_UNROLL
+#    undef ENC_KS_UNROLL
+#  endif
+#  ifdef DEC_KS_UNROLL
+#    undef DEC_KS_UNROLL
+#  endif
+#endif
+
+#if (FUNCS_IN_C & ENC_KEYING_IN_C)
+
+#if defined(AES_128) || defined( AES_VAR )
+
+#define ke4(k,i) \
+{   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \
+    k[4*(i)+5] = ss[1] ^= ss[0]; \
+    k[4*(i)+6] = ss[2] ^= ss[1]; \
+    k[4*(i)+7] = ss[3] ^= ss[2]; \
+}
+
+AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1])
+{   uint_32t    ss[4];
+
+    cx->ks[0] = ss[0] = word_in(key, 0);
+    cx->ks[1] = ss[1] = word_in(key, 1);
+    cx->ks[2] = ss[2] = word_in(key, 2);
+    cx->ks[3] = ss[3] = word_in(key, 3);
+
+#ifdef ENC_KS_UNROLL
+    ke4(cx->ks, 0);  ke4(cx->ks, 1);
+    ke4(cx->ks, 2);  ke4(cx->ks, 3);
+    ke4(cx->ks, 4);  ke4(cx->ks, 5);
+    ke4(cx->ks, 6);  ke4(cx->ks, 7);
+    ke4(cx->ks, 8);
+#else
+    {   uint_32t i;
+        for(i = 0; i < 9; ++i)
+            ke4(cx->ks, i);
+    }
+#endif
+    ke4(cx->ks, 9);
+    cx->inf.l = 0;
+    cx->inf.b[0] = 10 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(AES_192) || defined( AES_VAR )
+
+#define kef6(k,i) \
+{   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \
+    k[6*(i)+ 7] = ss[1] ^= ss[0]; \
+    k[6*(i)+ 8] = ss[2] ^= ss[1]; \
+    k[6*(i)+ 9] = ss[3] ^= ss[2]; \
+}
+
+#define ke6(k,i) \
+{   kef6(k,i); \
+    k[6*(i)+10] = ss[4] ^= ss[3]; \
+    k[6*(i)+11] = ss[5] ^= ss[4]; \
+}
+
+AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1])
+{   uint_32t    ss[6];
+
+    cx->ks[0] = ss[0] = word_in(key, 0);
+    cx->ks[1] = ss[1] = word_in(key, 1);
+    cx->ks[2] = ss[2] = word_in(key, 2);
+    cx->ks[3] = ss[3] = word_in(key, 3);
+    cx->ks[4] = ss[4] = word_in(key, 4);
+    cx->ks[5] = ss[5] = word_in(key, 5);
+
+#ifdef ENC_KS_UNROLL
+    ke6(cx->ks, 0);  ke6(cx->ks, 1);
+    ke6(cx->ks, 2);  ke6(cx->ks, 3);
+    ke6(cx->ks, 4);  ke6(cx->ks, 5);
+    ke6(cx->ks, 6);
+#else
+    {   uint_32t i;
+        for(i = 0; i < 7; ++i)
+            ke6(cx->ks, i);
+    }
+#endif
+    kef6(cx->ks, 7);
+    cx->inf.l = 0;
+    cx->inf.b[0] = 12 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(AES_256) || defined( AES_VAR )
+
+#define kef8(k,i) \
+{   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \
+    k[8*(i)+ 9] = ss[1] ^= ss[0]; \
+    k[8*(i)+10] = ss[2] ^= ss[1]; \
+    k[8*(i)+11] = ss[3] ^= ss[2]; \
+}
+
+#define ke8(k,i) \
+{   kef8(k,i); \
+    k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
+    k[8*(i)+13] = ss[5] ^= ss[4]; \
+    k[8*(i)+14] = ss[6] ^= ss[5]; \
+    k[8*(i)+15] = ss[7] ^= ss[6]; \
+}
+
+AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1])
+{   uint_32t    ss[8];
+
+    cx->ks[0] = ss[0] = word_in(key, 0);
+    cx->ks[1] = ss[1] = word_in(key, 1);
+    cx->ks[2] = ss[2] = word_in(key, 2);
+    cx->ks[3] = ss[3] = word_in(key, 3);
+    cx->ks[4] = ss[4] = word_in(key, 4);
+    cx->ks[5] = ss[5] = word_in(key, 5);
+    cx->ks[6] = ss[6] = word_in(key, 6);
+    cx->ks[7] = ss[7] = word_in(key, 7);
+
+#ifdef ENC_KS_UNROLL
+    ke8(cx->ks, 0); ke8(cx->ks, 1);
+    ke8(cx->ks, 2); ke8(cx->ks, 3);
+    ke8(cx->ks, 4); ke8(cx->ks, 5);
+#else
+    {   uint_32t i;
+        for(i = 0; i < 6; ++i)
+            ke8(cx->ks,  i);
+    }
+#endif
+    kef8(cx->ks, 6);
+    cx->inf.l = 0;
+    cx->inf.b[0] = 14 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined( AES_VAR )
+
+AES_RETURN aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1])
+{   
+    switch(key_len)
+    {
+    case 16: case 128: return aes_encrypt_key128(key, cx);
+    case 24: case 192: return aes_encrypt_key192(key, cx);
+    case 32: case 256: return aes_encrypt_key256(key, cx);
+    default: return EXIT_FAILURE;
+    }
+}
+
+#endif
+
+#endif
+
+#if (FUNCS_IN_C & DEC_KEYING_IN_C)
+
+/* this is used to store the decryption round keys  */
+/* in forward or reverse order                      */
+
+#ifdef AES_REV_DKS
+#define v(n,i)  ((n) - (i) + 2 * ((i) & 3))
+#else
+#define v(n,i)  (i)
+#endif
+
+#if DEC_ROUND == NO_TABLES
+#define ff(x)   (x)
+#else
+#define ff(x)   inv_mcol(x)
+#if defined( dec_imvars )
+#define d_vars  dec_imvars
+#endif
+#endif
+
+#if defined(AES_128) || defined( AES_VAR )
+
+#define k4e(k,i) \
+{   k[v(40,(4*(i))+4)] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; \
+    k[v(40,(4*(i))+5)] = ss[1] ^= ss[0]; \
+    k[v(40,(4*(i))+6)] = ss[2] ^= ss[1]; \
+    k[v(40,(4*(i))+7)] = ss[3] ^= ss[2]; \
+}
+
+#if 1
+
+#define kdf4(k,i) \
+{   ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
+    ss[1] = ss[1] ^ ss[3]; \
+    ss[2] = ss[2] ^ ss[3]; \
+    ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \
+    ss[i % 4] ^= ss[4]; \
+    ss[4] ^= k[v(40,(4*(i)))];   k[v(40,(4*(i))+4)] = ff(ss[4]); \
+    ss[4] ^= k[v(40,(4*(i))+1)]; k[v(40,(4*(i))+5)] = ff(ss[4]); \
+    ss[4] ^= k[v(40,(4*(i))+2)]; k[v(40,(4*(i))+6)] = ff(ss[4]); \
+    ss[4] ^= k[v(40,(4*(i))+3)]; k[v(40,(4*(i))+7)] = ff(ss[4]); \
+}
+
+#define kd4(k,i) \
+{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; \
+    ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
+    k[v(40,(4*(i))+4)] = ss[4] ^= k[v(40,(4*(i)))]; \
+    k[v(40,(4*(i))+5)] = ss[4] ^= k[v(40,(4*(i))+1)]; \
+    k[v(40,(4*(i))+6)] = ss[4] ^= k[v(40,(4*(i))+2)]; \
+    k[v(40,(4*(i))+7)] = ss[4] ^= k[v(40,(4*(i))+3)]; \
+}
+
+#define kdl4(k,i) \
+{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \
+    k[v(40,(4*(i))+4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
+    k[v(40,(4*(i))+5)] = ss[1] ^ ss[3]; \
+    k[v(40,(4*(i))+6)] = ss[0]; \
+    k[v(40,(4*(i))+7)] = ss[1]; \
+}
+
+#else
+
+#define kdf4(k,i) \
+{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ff(ss[0]); \
+    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ff(ss[1]); \
+    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ff(ss[2]); \
+    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ff(ss[3]); \
+}
+
+#define kd4(k,i) \
+{   ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \
+    ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[v(40,(4*(i))+ 4)] = ss[4] ^= k[v(40,(4*(i)))]; \
+    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[4] ^= k[v(40,(4*(i))+ 1)]; \
+    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[4] ^= k[v(40,(4*(i))+ 2)]; \
+    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[4] ^= k[v(40,(4*(i))+ 3)]; \
+}
+
+#define kdl4(k,i) \
+{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[v(40,(4*(i))+ 4)] = ss[0]; \
+    ss[1] ^= ss[0]; k[v(40,(4*(i))+ 5)] = ss[1]; \
+    ss[2] ^= ss[1]; k[v(40,(4*(i))+ 6)] = ss[2]; \
+    ss[3] ^= ss[2]; k[v(40,(4*(i))+ 7)] = ss[3]; \
+}
+
+#endif
+
+AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1])
+{   uint_32t    ss[5];
+#if defined( d_vars )
+        d_vars;
+#endif
+    cx->ks[v(40,(0))] = ss[0] = word_in(key, 0);
+    cx->ks[v(40,(1))] = ss[1] = word_in(key, 1);
+    cx->ks[v(40,(2))] = ss[2] = word_in(key, 2);
+    cx->ks[v(40,(3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+     kdf4(cx->ks, 0); kd4(cx->ks, 1);
+     kd4(cx->ks, 2);  kd4(cx->ks, 3);
+     kd4(cx->ks, 4);  kd4(cx->ks, 5);
+     kd4(cx->ks, 6);  kd4(cx->ks, 7);
+     kd4(cx->ks, 8);  kdl4(cx->ks, 9);
+#else
+    {   uint_32t i;
+        for(i = 0; i < 10; ++i)
+            k4e(cx->ks, i);
+#if !(DEC_ROUND == NO_TABLES)
+        for(i = N_COLS; i < 10 * N_COLS; ++i)
+            cx->ks[i] = inv_mcol(cx->ks[i]);
+#endif
+    }
+#endif
+    cx->inf.l = 0;
+    cx->inf.b[0] = 10 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(AES_192) || defined( AES_VAR )
+
+#define k6ef(k,i) \
+{   k[v(48,(6*(i))+ 6)] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; \
+    k[v(48,(6*(i))+ 7)] = ss[1] ^= ss[0]; \
+    k[v(48,(6*(i))+ 8)] = ss[2] ^= ss[1]; \
+    k[v(48,(6*(i))+ 9)] = ss[3] ^= ss[2]; \
+}
+
+#define k6e(k,i) \
+{   k6ef(k,i); \
+    k[v(48,(6*(i))+10)] = ss[4] ^= ss[3]; \
+    k[v(48,(6*(i))+11)] = ss[5] ^= ss[4]; \
+}
+
+#define kdf6(k,i) \
+{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ff(ss[0]); \
+    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ff(ss[1]); \
+    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ff(ss[2]); \
+    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ff(ss[3]); \
+    ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ff(ss[4]); \
+    ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ff(ss[5]); \
+}
+
+#define kd6(k,i) \
+{   ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \
+    ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[v(48,(6*(i))+ 6)] = ss[6] ^= k[v(48,(6*(i)))]; \
+    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[6] ^= k[v(48,(6*(i))+ 1)]; \
+    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[6] ^= k[v(48,(6*(i))+ 2)]; \
+    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[6] ^= k[v(48,(6*(i))+ 3)]; \
+    ss[4] ^= ss[3]; k[v(48,(6*(i))+10)] = ss[6] ^= k[v(48,(6*(i))+ 4)]; \
+    ss[5] ^= ss[4]; k[v(48,(6*(i))+11)] = ss[6] ^= k[v(48,(6*(i))+ 5)]; \
+}
+
+#define kdl6(k,i) \
+{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[v(48,(6*(i))+ 6)] = ss[0]; \
+    ss[1] ^= ss[0]; k[v(48,(6*(i))+ 7)] = ss[1]; \
+    ss[2] ^= ss[1]; k[v(48,(6*(i))+ 8)] = ss[2]; \
+    ss[3] ^= ss[2]; k[v(48,(6*(i))+ 9)] = ss[3]; \
+}
+
+AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1])
+{   uint_32t    ss[7];
+#if defined( d_vars )
+        d_vars;
+#endif
+    cx->ks[v(48,(0))] = ss[0] = word_in(key, 0);
+    cx->ks[v(48,(1))] = ss[1] = word_in(key, 1);
+    cx->ks[v(48,(2))] = ss[2] = word_in(key, 2);
+    cx->ks[v(48,(3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+    cx->ks[v(48,(4))] = ff(ss[4] = word_in(key, 4));
+    cx->ks[v(48,(5))] = ff(ss[5] = word_in(key, 5));
+    kdf6(cx->ks, 0); kd6(cx->ks, 1);
+    kd6(cx->ks, 2);  kd6(cx->ks, 3);
+    kd6(cx->ks, 4);  kd6(cx->ks, 5);
+    kd6(cx->ks, 6);  kdl6(cx->ks, 7);
+#else
+    cx->ks[v(48,(4))] = ss[4] = word_in(key, 4);
+    cx->ks[v(48,(5))] = ss[5] = word_in(key, 5);
+    {   uint_32t i;
+
+        for(i = 0; i < 7; ++i)
+            k6e(cx->ks, i);
+        k6ef(cx->ks, 7);
+#if !(DEC_ROUND == NO_TABLES)
+        for(i = N_COLS; i < 12 * N_COLS; ++i)
+            cx->ks[i] = inv_mcol(cx->ks[i]);
+#endif
+    }
+#endif
+    cx->inf.l = 0;
+    cx->inf.b[0] = 12 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(AES_256) || defined( AES_VAR )
+
+#define k8ef(k,i) \
+{   k[v(56,(8*(i))+ 8)] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; \
+    k[v(56,(8*(i))+ 9)] = ss[1] ^= ss[0]; \
+    k[v(56,(8*(i))+10)] = ss[2] ^= ss[1]; \
+    k[v(56,(8*(i))+11)] = ss[3] ^= ss[2]; \
+}
+
+#define k8e(k,i) \
+{   k8ef(k,i); \
+    k[v(56,(8*(i))+12)] = ss[4] ^= ls_box(ss[3],0); \
+    k[v(56,(8*(i))+13)] = ss[5] ^= ss[4]; \
+    k[v(56,(8*(i))+14)] = ss[6] ^= ss[5]; \
+    k[v(56,(8*(i))+15)] = ss[7] ^= ss[6]; \
+}
+
+#define kdf8(k,i) \
+{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ff(ss[0]); \
+    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ff(ss[1]); \
+    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ff(ss[2]); \
+    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ff(ss[3]); \
+    ss[4] ^= ls_box(ss[3],0); k[v(56,(8*(i))+12)] = ff(ss[4]); \
+    ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ff(ss[5]); \
+    ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ff(ss[6]); \
+    ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ff(ss[7]); \
+}
+
+#define kd8(k,i) \
+{   ss[8] = ls_box(ss[7],3) ^ t_use(r,c)[i]; \
+    ss[0] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+ 8)] = ss[8] ^= k[v(56,(8*(i)))]; \
+    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[8] ^= k[v(56,(8*(i))+ 1)]; \
+    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[8] ^= k[v(56,(8*(i))+ 2)]; \
+    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[8] ^= k[v(56,(8*(i))+ 3)]; \
+    ss[8] = ls_box(ss[3],0); \
+    ss[4] ^= ss[8]; ss[8] = ff(ss[8]); k[v(56,(8*(i))+12)] = ss[8] ^= k[v(56,(8*(i))+ 4)]; \
+    ss[5] ^= ss[4]; k[v(56,(8*(i))+13)] = ss[8] ^= k[v(56,(8*(i))+ 5)]; \
+    ss[6] ^= ss[5]; k[v(56,(8*(i))+14)] = ss[8] ^= k[v(56,(8*(i))+ 6)]; \
+    ss[7] ^= ss[6]; k[v(56,(8*(i))+15)] = ss[8] ^= k[v(56,(8*(i))+ 7)]; \
+}
+
+#define kdl8(k,i) \
+{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[v(56,(8*(i))+ 8)] = ss[0]; \
+    ss[1] ^= ss[0]; k[v(56,(8*(i))+ 9)] = ss[1]; \
+    ss[2] ^= ss[1]; k[v(56,(8*(i))+10)] = ss[2]; \
+    ss[3] ^= ss[2]; k[v(56,(8*(i))+11)] = ss[3]; \
+}
+
+AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1])
+{   uint_32t    ss[9];
+#if defined( d_vars )
+        d_vars;
+#endif
+    cx->ks[v(56,(0))] = ss[0] = word_in(key, 0);
+    cx->ks[v(56,(1))] = ss[1] = word_in(key, 1);
+    cx->ks[v(56,(2))] = ss[2] = word_in(key, 2);
+    cx->ks[v(56,(3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+    cx->ks[v(56,(4))] = ff(ss[4] = word_in(key, 4));
+    cx->ks[v(56,(5))] = ff(ss[5] = word_in(key, 5));
+    cx->ks[v(56,(6))] = ff(ss[6] = word_in(key, 6));
+    cx->ks[v(56,(7))] = ff(ss[7] = word_in(key, 7));
+    kdf8(cx->ks, 0); kd8(cx->ks, 1);
+    kd8(cx->ks, 2);  kd8(cx->ks, 3);
+    kd8(cx->ks, 4);  kd8(cx->ks, 5);
+    kdl8(cx->ks, 6);
+#else
+    cx->ks[v(56,(4))] = ss[4] = word_in(key, 4);
+    cx->ks[v(56,(5))] = ss[5] = word_in(key, 5);
+    cx->ks[v(56,(6))] = ss[6] = word_in(key, 6);
+    cx->ks[v(56,(7))] = ss[7] = word_in(key, 7);
+    {   uint_32t i;
+
+        for(i = 0; i < 6; ++i)
+            k8e(cx->ks,  i);
+        k8ef(cx->ks,  6);
+#if !(DEC_ROUND == NO_TABLES)
+        for(i = N_COLS; i < 14 * N_COLS; ++i)
+            cx->ks[i] = inv_mcol(cx->ks[i]);
+#endif
+    }
+#endif
+    cx->inf.l = 0;
+    cx->inf.b[0] = 14 * 16;
+
+#ifdef USE_VIA_ACE_IF_PRESENT
+    if(VIA_ACE_AVAILABLE)
+        cx->inf.b[1] = 0xff;
+#endif
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined( AES_VAR )
+
+AES_RETURN aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1])
+{
+    switch(key_len)
+    {
+    case 16: case 128: return aes_decrypt_key128(key, cx);
+    case 24: case 192: return aes_decrypt_key192(key, cx);
+    case 32: case 256: return aes_decrypt_key256(key, cx);
+    default: return EXIT_FAILURE;
+    }
+}
+
+#endif
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
--- a/src/java/kp2akeytransform/jni/aes/aesopt.h
+++ b/src/java/kp2akeytransform/jni/aes/aesopt.h
@ -0,0 +1,747 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+
+ This file contains the compilation options for AES (Rijndael) and code
+ that is common across encryption, key scheduling and table generation.
+
+ OPERATION
+
+ These source code files implement the AES algorithm Rijndael designed by
+ Joan Daemen and Vincent Rijmen. This version is designed for the standard
+ block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
+ and 32 bytes).
+
+ This version is designed for flexibility and speed using operations on
+ 32-bit words rather than operations on bytes.  It can be compiled with
+ either big or little endian internal byte order but is faster when the
+ native byte order for the processor is used.
+
+ THE CIPHER INTERFACE
+
+ The cipher interface is implemented as an array of bytes in which lower
+ AES bit sequence indexes map to higher numeric significance within bytes.
+
+  uint_8t                 (an unsigned  8-bit type)
+  uint_32t                (an unsigned 32-bit type)
+  struct aes_encrypt_ctx  (structure for the cipher encryption context)
+  struct aes_decrypt_ctx  (structure for the cipher decryption context)
+  AES_RETURN                the function return type
+
+  C subroutine calls:
+
+  AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
+  AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
+  AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
+  AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out,
+                                                  const aes_encrypt_ctx cx[1]);
+
+  AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
+  AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
+  AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
+  AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out,
+                                                  const aes_decrypt_ctx cx[1]);
+
+ IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
+ you call aes_init() before AES is used so that the tables are initialised.
+
+ C++ aes class subroutines:
+
+     Class AESencrypt  for encryption
+
+      Construtors:
+          AESencrypt(void)
+          AESencrypt(const unsigned char *key) - 128 bit key
+      Members:
+          AES_RETURN key128(const unsigned char *key)
+          AES_RETURN key192(const unsigned char *key)
+          AES_RETURN key256(const unsigned char *key)
+          AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const
+
+      Class AESdecrypt  for encryption
+      Construtors:
+          AESdecrypt(void)
+          AESdecrypt(const unsigned char *key) - 128 bit key
+      Members:
+          AES_RETURN key128(const unsigned char *key)
+          AES_RETURN key192(const unsigned char *key)
+          AES_RETURN key256(const unsigned char *key)
+          AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const
+*/
+
+#if !defined( _AESOPT_H )
+#define _AESOPT_H
+
+#if defined( __cplusplus )
+#include "aescpp.h"
+#else
+#include "aes.h"
+#endif
+
+/*  PLATFORM SPECIFIC INCLUDES */
+
+#include "brg_endian.h"
+
+/*  CONFIGURATION - THE USE OF DEFINES
+
+    Later in this section there are a number of defines that control the
+    operation of the code.  In each section, the purpose of each define is
+    explained so that the relevant form can be included or excluded by
+    setting either 1's or 0's respectively on the branches of the related
+    #if clauses.  The following local defines should not be changed.
+*/
+
+#define ENCRYPTION_IN_C     1
+#define DECRYPTION_IN_C     2
+#define ENC_KEYING_IN_C     4
+#define DEC_KEYING_IN_C     8
+
+#define NO_TABLES           0
+#define ONE_TABLE           1
+#define FOUR_TABLES         4
+#define NONE                0
+#define PARTIAL             1
+#define FULL                2
+
+/*  --- START OF USER CONFIGURED OPTIONS --- */
+
+/*  1. BYTE ORDER WITHIN 32 BIT WORDS
+
+    The fundamental data processing units in Rijndael are 8-bit bytes. The
+    input, output and key input are all enumerated arrays of bytes in which
+    bytes are numbered starting at zero and increasing to one less than the
+    number of bytes in the array in question. This enumeration is only used
+    for naming bytes and does not imply any adjacency or order relationship
+    from one byte to another. When these inputs and outputs are considered
+    as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
+    byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
+    In this implementation bits are numbered from 0 to 7 starting at the
+    numerically least significant end of each byte (bit n represents 2^n).
+
+    However, Rijndael can be implemented more efficiently using 32-bit
+    words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
+    into word[n]. While in principle these bytes can be assembled into words
+    in any positions, this implementation only supports the two formats in
+    which bytes in adjacent positions within words also have adjacent byte
+    numbers. This order is called big-endian if the lowest numbered bytes
+    in words have the highest numeric significance and little-endian if the
+    opposite applies.
+
+    This code can work in either order irrespective of the order used by the
+    machine on which it runs. Normally the internal byte order will be set
+    to the order of the processor on which the code is to be run but this
+    define can be used to reverse this in special situations
+
+    WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
+    This define will hence be redefined later (in section 4) if necessary
+*/
+
+#if 1
+#  define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
+#elif 0
+#  define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0
+#  define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+#  error The algorithm byte order is not defined
+#endif
+
+/*  2. VIA ACE SUPPORT */
+
+#if defined( __GNUC__ ) && defined( __i386__ ) \
+ || defined( _WIN32   ) && defined( _M_IX86  ) \
+ && !(defined( _WIN64 ) || defined( _WIN32_WCE ) || defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
+#  define VIA_ACE_POSSIBLE
+#endif
+
+/*  Define this option if support for the VIA ACE is required. This uses
+    inline assembler instructions and is only implemented for the Microsoft,
+    Intel and GCC compilers.  If VIA ACE is known to be present, then defining
+    ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
+    code.  If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
+    it is detected (both present and enabled) but the normal AES code will
+    also be present.
+
+    When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
+    aligned; other input/output buffers do not need to be 16 byte aligned
+    but there are very large performance gains if this can be arranged.
+    VIA ACE also requires the decryption key schedule to be in reverse
+    order (which later checks below ensure).
+*/
+
+/* Disable VIA ACE cpu detection which crashes on x86 android devices */
+#if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT )
+#  define USE_VIA_ACE_IF_PRESENT
+#endif
+
+#if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT )
+#  define ASSUME_VIA_ACE_PRESENT
+#  endif
+
+/*  3. ASSEMBLER SUPPORT
+
+    This define (which can be on the command line) enables the use of the
+    assembler code routines for encryption, decryption and key scheduling
+    as follows:
+
+    ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
+                encryption and decryption and but with key scheduling in C
+    ASM_X86_V2  uses assembler (aes_x86_v2.asm) with compressed tables for
+                encryption, decryption and key scheduling
+    ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
+                encryption and decryption and but with key scheduling in C
+    ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
+                encryption and decryption and but with key scheduling in C
+
+    Change one 'if 0' below to 'if 1' to select the version or define
+    as a compilation option.
+*/
+
+#if 0 && !defined( ASM_X86_V1C )
+#  define ASM_X86_V1C
+#elif 0 && !defined( ASM_X86_V2  )
+#  define ASM_X86_V2
+#elif 0 && !defined( ASM_X86_V2C )
+#  define ASM_X86_V2C
+#elif 0 && !defined( ASM_AMD64_C )
+#  define ASM_AMD64_C
+#endif
+
+#if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \
+      && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 )
+#  error Assembler code is only available for x86 and AMD64 systems
+#endif
+
+/*  4. FAST INPUT/OUTPUT OPERATIONS.
+
+    On some machines it is possible to improve speed by transferring the
+    bytes in the input and output arrays to and from the internal 32-bit
+    variables by addressing these arrays as if they are arrays of 32-bit
+    words.  On some machines this will always be possible but there may
+    be a large performance penalty if the byte arrays are not aligned on
+    the normal word boundaries. On other machines this technique will
+    lead to memory access errors when such 32-bit word accesses are not
+    properly aligned. The option SAFE_IO avoids such problems but will
+    often be slower on those machines that support misaligned access
+    (especially so if care is taken to align the input  and output byte
+    arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
+    assumed that access to byte arrays as if they are arrays of 32-bit
+    words will not cause problems when such accesses are misaligned.
+*/
+#if 1 && !defined( _MSC_VER )
+#  define SAFE_IO
+#endif
+
+/*  5. LOOP UNROLLING
+
+    The code for encryption and decrytpion cycles through a number of rounds
+    that can be implemented either in a loop or by expanding the code into a
+    long sequence of instructions, the latter producing a larger program but
+    one that will often be much faster. The latter is called loop unrolling.
+    There are also potential speed advantages in expanding two iterations in
+    a loop with half the number of iterations, which is called partial loop
+    unrolling.  The following options allow partial or full loop unrolling
+    to be set independently for encryption and decryption
+*/
+#if 1
+#  define ENC_UNROLL  FULL
+#elif 0
+#  define ENC_UNROLL  PARTIAL
+#else
+#  define ENC_UNROLL  NONE
+#endif
+
+#if 1
+#  define DEC_UNROLL  FULL
+#elif 0
+#  define DEC_UNROLL  PARTIAL
+#else
+#  define DEC_UNROLL  NONE
+#endif
+
+#if 1
+#  define ENC_KS_UNROLL
+#endif
+
+#if 1
+#  define DEC_KS_UNROLL
+#endif
+
+/*  6. FAST FINITE FIELD OPERATIONS
+
+    If this section is included, tables are used to provide faster finite
+    field arithmetic (this has no effect if FIXED_TABLES is defined).
+*/
+#if 1
+#  define FF_TABLES
+#endif
+
+/*  7. INTERNAL STATE VARIABLE FORMAT
+
+    The internal state of Rijndael is stored in a number of local 32-bit
+    word varaibles which can be defined either as an array or as individual
+    names variables. Include this section if you want to store these local
+    varaibles in arrays. Otherwise individual local variables will be used.
+*/
+#if 1
+#  define ARRAYS
+#endif
+
+/*  8. FIXED OR DYNAMIC TABLES
+
+    When this section is included the tables used by the code are compiled
+    statically into the binary file.  Otherwise the subroutine aes_init()
+    must be called to compute them before the code is first used.
+*/
+#if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
+#  define FIXED_TABLES
+#endif
+
+/*  9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
+
+    In some systems it is better to mask longer values to extract bytes 
+    rather than using a cast. This option allows this choice.
+*/
+#if 0
+#  define to_byte(x)  ((uint_8t)(x))
+#else
+#  define to_byte(x)  ((x) & 0xff)
+#endif
+
+/*  10. TABLE ALIGNMENT
+
+    On some sytsems speed will be improved by aligning the AES large lookup
+    tables on particular boundaries. This define should be set to a power of
+    two giving the desired alignment. It can be left undefined if alignment
+    is not needed.  This option is specific to the Microsft VC++ compiler -
+    it seems to sometimes cause trouble for the VC++ version 6 compiler.
+*/
+
+#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
+#  define TABLE_ALIGN 32
+#endif
+
+/*  11.  REDUCE CODE AND TABLE SIZE
+
+    This replaces some expanded macros with function calls if AES_ASM_V2 or
+    AES_ASM_V2C are defined
+*/
+
+#if 1 && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))
+#  define REDUCE_CODE_SIZE
+#endif
+
+/*  12. TABLE OPTIONS
+
+    This cipher proceeds by repeating in a number of cycles known as 'rounds'
+    which are implemented by a round function which can optionally be speeded
+    up using tables.  The basic tables are each 256 32-bit words, with either
+    one or four tables being required for each round function depending on
+    how much speed is required. The encryption and decryption round functions
+    are different and the last encryption and decrytpion round functions are
+    different again making four different round functions in all.
+
+    This means that:
+      1. Normal encryption and decryption rounds can each use either 0, 1
+         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+      2. The last encryption and decryption rounds can also use either 0, 1
+         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+
+    Include or exclude the appropriate definitions below to set the number
+    of tables used by this implementation.
+*/
+
+#if 1   /* set tables for the normal encryption round */
+#  define ENC_ROUND   FOUR_TABLES
+#elif 0
+#  define ENC_ROUND   ONE_TABLE
+#else
+#  define ENC_ROUND   NO_TABLES
+#endif
+
+#if 1   /* set tables for the last encryption round */
+#  define LAST_ENC_ROUND  FOUR_TABLES
+#elif 0
+#  define LAST_ENC_ROUND  ONE_TABLE
+#else
+#  define LAST_ENC_ROUND  NO_TABLES
+#endif
+
+#if 1   /* set tables for the normal decryption round */
+#  define DEC_ROUND   FOUR_TABLES
+#elif 0
+#  define DEC_ROUND   ONE_TABLE
+#else
+#  define DEC_ROUND   NO_TABLES
+#endif
+
+#if 1   /* set tables for the last decryption round */
+#  define LAST_DEC_ROUND  FOUR_TABLES
+#elif 0
+#  define LAST_DEC_ROUND  ONE_TABLE
+#else
+#  define LAST_DEC_ROUND  NO_TABLES
+#endif
+
+/*  The decryption key schedule can be speeded up with tables in the same
+    way that the round functions can.  Include or exclude the following
+    defines to set this requirement.
+*/
+#if 1
+#  define KEY_SCHED   FOUR_TABLES
+#elif 0
+#  define KEY_SCHED   ONE_TABLE
+#else
+#  define KEY_SCHED   NO_TABLES
+#endif
+
+/*  ---- END OF USER CONFIGURED OPTIONS ---- */
+
+/* VIA ACE support is only available for VC++ and GCC */
+
+#if !defined( _MSC_VER ) && !defined( __GNUC__ )
+#  if defined( ASSUME_VIA_ACE_PRESENT )
+#    undef ASSUME_VIA_ACE_PRESENT
+#  endif
+#  if defined( USE_VIA_ACE_IF_PRESENT )
+#    undef USE_VIA_ACE_IF_PRESENT
+#  endif
+#endif
+
+#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
+#  define USE_VIA_ACE_IF_PRESENT
+#endif
+
+#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
+#  define AES_REV_DKS
+#endif
+
+/* Assembler support requires the use of platform byte order */
+
+#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \
+    && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
+#  undef  ALGORITHM_BYTE_ORDER
+#  define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
+#endif
+
+/* In this implementation the columns of the state array are each held in
+   32-bit words. The state array can be held in various ways: in an array
+   of words, in a number of individual word variables or in a number of
+   processor registers. The following define maps a variable name x and
+   a column number c to the way the state array variable is to be held.
+   The first define below maps the state into an array x[c] whereas the
+   second form maps the state into a number of individual variables x0,
+   x1, etc.  Another form could map individual state colums to machine
+   register names.
+*/
+
+#if defined( ARRAYS )
+#  define s(x,c) x[c]
+#else
+#  define s(x,c) x##c
+#endif
+
+/*  This implementation provides subroutines for encryption, decryption
+    and for setting the three key lengths (separately) for encryption
+    and decryption. Since not all functions are needed, masks are set
+    up here to determine which will be implemented in C
+*/
+
+#if !defined( AES_ENCRYPT )
+#  define EFUNCS_IN_C   0
+#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
+    || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
+#  define EFUNCS_IN_C   ENC_KEYING_IN_C
+#elif !defined( ASM_X86_V2 )
+#  define EFUNCS_IN_C   ( ENCRYPTION_IN_C | ENC_KEYING_IN_C )
+#else
+#  define EFUNCS_IN_C   0
+#endif
+
+#if !defined( AES_DECRYPT )
+#  define DFUNCS_IN_C   0
+#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
+    || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
+#  define DFUNCS_IN_C   DEC_KEYING_IN_C
+#elif !defined( ASM_X86_V2 )
+#  define DFUNCS_IN_C   ( DECRYPTION_IN_C | DEC_KEYING_IN_C )
+#else
+#  define DFUNCS_IN_C   0
+#endif
+
+#define FUNCS_IN_C  ( EFUNCS_IN_C | DFUNCS_IN_C )
+
+/* END OF CONFIGURATION OPTIONS */
+
+#define RC_LENGTH   (5 * (AES_BLOCK_SIZE / 4 - 2))
+
+/* Disable or report errors on some combinations of options */
+
+#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
+#  undef  LAST_ENC_ROUND
+#  define LAST_ENC_ROUND  NO_TABLES
+#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
+#  undef  LAST_ENC_ROUND
+#  define LAST_ENC_ROUND  ONE_TABLE
+#endif
+
+#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
+#  undef  ENC_UNROLL
+#  define ENC_UNROLL  NONE
+#endif
+
+#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
+#  undef  LAST_DEC_ROUND
+#  define LAST_DEC_ROUND  NO_TABLES
+#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
+#  undef  LAST_DEC_ROUND
+#  define LAST_DEC_ROUND  ONE_TABLE
+#endif
+
+#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
+#  undef  DEC_UNROLL
+#  define DEC_UNROLL  NONE
+#endif
+
+#if defined( bswap32 )
+#  define aes_sw32    bswap32
+#elif defined( bswap_32 )
+#  define aes_sw32    bswap_32
+#else
+#  define brot(x,n)   (((uint_32t)(x) <<  n) | ((uint_32t)(x) >> (32 - n)))
+#  define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
+#endif
+
+/*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
+               higher index positions with wrap around into low positions
+    ups(x,n):  moves bytes by n positions to higher index positions in
+               words but without wrap around
+    bval(x,n): extracts a byte from a word
+
+    WARNING:   The definitions given here are intended only for use with
+               unsigned variables and with shift counts that are compile
+               time constants
+*/
+
+#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
+#  define upr(x,n)      (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n))))
+#  define ups(x,n)      ((uint_32t) (x) << (8 * (n)))
+#  define bval(x,n)     to_byte((x) >> (8 * (n)))
+#  define bytes2word(b0, b1, b2, b3)  \
+        (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0))
+#endif
+
+#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
+#  define upr(x,n)      (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n))))
+#  define ups(x,n)      ((uint_32t) (x) >> (8 * (n)))
+#  define bval(x,n)     to_byte((x) >> (24 - 8 * (n)))
+#  define bytes2word(b0, b1, b2, b3)  \
+        (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3))
+#endif
+
+#if defined( SAFE_IO )
+#  define word_in(x,c)    bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \
+                                   ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3])
+#  define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \
+                          ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); }
+#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
+#  define word_in(x,c)    (*((uint_32t*)(x)+(c)))
+#  define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v))
+#else
+#  define word_in(x,c)    aes_sw32(*((uint_32t*)(x)+(c)))
+#  define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v))
+#endif
+
+/* the finite field modular polynomial and elements */
+
+#define WPOLY   0x011b
+#define BPOLY     0x1b
+
+/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
+
+#define m1  0x80808080
+#define m2  0x7f7f7f7f
+#define gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
+
+/* The following defines provide alternative definitions of gf_mulx that might
+   give improved performance if a fast 32-bit multiply is not available. Note
+   that a temporary variable u needs to be defined where gf_mulx is used.
+
+#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
+#define m4  (0x01010101 * BPOLY)
+#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
+*/
+
+/* Work out which tables are needed for the different options   */
+
+#if defined( ASM_X86_V1C )
+#  if defined( ENC_ROUND )
+#    undef  ENC_ROUND
+#  endif
+#  define ENC_ROUND   FOUR_TABLES
+#  if defined( LAST_ENC_ROUND )
+#    undef  LAST_ENC_ROUND
+#  endif
+#  define LAST_ENC_ROUND  FOUR_TABLES
+#  if defined( DEC_ROUND )
+#    undef  DEC_ROUND
+#  endif
+#  define DEC_ROUND   FOUR_TABLES
+#  if defined( LAST_DEC_ROUND )
+#    undef  LAST_DEC_ROUND
+#  endif
+#  define LAST_DEC_ROUND  FOUR_TABLES
+#  if defined( KEY_SCHED )
+#    undef  KEY_SCHED
+#    define KEY_SCHED   FOUR_TABLES
+#  endif
+#endif
+
+#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C )
+#  if ENC_ROUND == ONE_TABLE
+#    define FT1_SET
+#  elif ENC_ROUND == FOUR_TABLES
+#    define FT4_SET
+#  else
+#    define SBX_SET
+#  endif
+#  if LAST_ENC_ROUND == ONE_TABLE
+#    define FL1_SET
+#  elif LAST_ENC_ROUND == FOUR_TABLES
+#    define FL4_SET
+#  elif !defined( SBX_SET )
+#    define SBX_SET
+#  endif
+#endif
+
+#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C )
+#  if DEC_ROUND == ONE_TABLE
+#    define IT1_SET
+#  elif DEC_ROUND == FOUR_TABLES
+#    define IT4_SET
+#  else
+#    define ISB_SET
+#  endif
+#  if LAST_DEC_ROUND == ONE_TABLE
+#    define IL1_SET
+#  elif LAST_DEC_ROUND == FOUR_TABLES
+#    define IL4_SET
+#  elif !defined(ISB_SET)
+#    define ISB_SET
+#  endif
+#endif
+
+#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )))
+#  if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
+#    if KEY_SCHED == ONE_TABLE
+#      if !defined( FL1_SET )  && !defined( FL4_SET ) 
+#        define LS1_SET
+#      endif
+#    elif KEY_SCHED == FOUR_TABLES
+#      if !defined( FL4_SET )
+#        define LS4_SET
+#      endif
+#    elif !defined( SBX_SET )
+#      define SBX_SET
+#    endif
+#  endif
+#  if (FUNCS_IN_C & DEC_KEYING_IN_C)
+#    if KEY_SCHED == ONE_TABLE
+#      define IM1_SET
+#    elif KEY_SCHED == FOUR_TABLES
+#      define IM4_SET
+#    elif !defined( SBX_SET )
+#      define SBX_SET
+#    endif
+#  endif
+#endif
+
+/* generic definitions of Rijndael macros that use tables    */
+
+#define no_table(x,box,vf,rf,c) bytes2word( \
+    box[bval(vf(x,0,c),rf(0,c))], \
+    box[bval(vf(x,1,c),rf(1,c))], \
+    box[bval(vf(x,2,c),rf(2,c))], \
+    box[bval(vf(x,3,c),rf(3,c))])
+
+#define one_table(x,op,tab,vf,rf,c) \
+ (     tab[bval(vf(x,0,c),rf(0,c))] \
+  ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
+  ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
+  ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
+
+#define four_tables(x,tab,vf,rf,c) \
+ (  tab[0][bval(vf(x,0,c),rf(0,c))] \
+  ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
+  ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
+  ^ tab[3][bval(vf(x,3,c),rf(3,c))])
+
+#define vf1(x,r,c)  (x)
+#define rf1(r,c)    (r)
+#define rf2(r,c)    ((8+r-c)&3)
+
+/* perform forward and inverse column mix operation on four bytes in long word x in */
+/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
+
+#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) 
+
+#if defined( FM4_SET )      /* not currently used */
+#  define fwd_mcol(x)       four_tables(x,t_use(f,m),vf1,rf1,0)
+#elif defined( FM1_SET )    /* not currently used */
+#  define fwd_mcol(x)       one_table(x,upr,t_use(f,m),vf1,rf1,0)
+#else
+#  define dec_fmvars        uint_32t g2
+#  define fwd_mcol(x)       (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
+#endif
+
+#if defined( IM4_SET )
+#  define inv_mcol(x)       four_tables(x,t_use(i,m),vf1,rf1,0)
+#elif defined( IM1_SET )
+#  define inv_mcol(x)       one_table(x,upr,t_use(i,m),vf1,rf1,0)
+#else
+#  define dec_imvars        uint_32t g2, g4, g9
+#  define inv_mcol(x)       (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
+                            (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
+#endif
+
+#if defined( FL4_SET )
+#  define ls_box(x,c)       four_tables(x,t_use(f,l),vf1,rf2,c)
+#elif defined( LS4_SET )
+#  define ls_box(x,c)       four_tables(x,t_use(l,s),vf1,rf2,c)
+#elif defined( FL1_SET )
+#  define ls_box(x,c)       one_table(x,upr,t_use(f,l),vf1,rf2,c)
+#elif defined( LS1_SET )
+#  define ls_box(x,c)       one_table(x,upr,t_use(l,s),vf1,rf2,c)
+#else
+#  define ls_box(x,c)       no_table(x,t_use(s,box),vf1,rf2,c)
+#endif
+
+#endif
+
+#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )
+#  define ISB_SET
+#endif
+
+#endif
--- a/src/java/kp2akeytransform/jni/aes/aestab.c
+++ b/src/java/kp2akeytransform/jni/aes/aestab.c
@ -0,0 +1,398 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+*/
+
+#define DO_TABLES
+
+#include "aes.h"
+#include "aesopt.h"
+
+#if defined(FIXED_TABLES)
+
+#define sb_data(w) {\
+    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
+    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
+    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
+    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
+    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
+    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
+    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
+    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
+    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
+    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
+    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
+    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
+    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
+    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
+    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
+    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
+    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
+    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
+    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
+    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
+    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
+    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
+    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
+    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
+    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
+    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
+    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
+    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
+    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
+    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
+    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
+    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
+
+#define isb_data(w) {\
+    w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
+    w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
+    w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
+    w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
+    w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
+    w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
+    w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
+    w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
+    w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
+    w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
+    w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
+    w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
+    w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
+    w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
+    w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
+    w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
+    w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
+    w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
+    w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
+    w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
+    w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
+    w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
+    w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
+    w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
+    w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
+    w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
+    w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
+    w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
+    w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
+    w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
+    w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
+    w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
+
+#define mm_data(w) {\
+    w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
+    w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
+    w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
+    w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
+    w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
+    w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
+    w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
+    w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
+    w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
+    w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
+    w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
+    w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
+    w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
+    w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
+    w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
+    w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
+    w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
+    w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
+    w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
+    w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
+    w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
+    w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
+    w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
+    w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
+    w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
+    w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
+    w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
+    w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
+    w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
+    w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
+    w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
+    w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
+
+#define rc_data(w) {\
+    w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
+    w(0x1b), w(0x36) }
+
+#define h0(x)   (x)
+
+#define w0(p)   bytes2word(p, 0, 0, 0)
+#define w1(p)   bytes2word(0, p, 0, 0)
+#define w2(p)   bytes2word(0, 0, p, 0)
+#define w3(p)   bytes2word(0, 0, 0, p)
+
+#define u0(p)   bytes2word(f2(p), p, p, f3(p))
+#define u1(p)   bytes2word(f3(p), f2(p), p, p)
+#define u2(p)   bytes2word(p, f3(p), f2(p), p)
+#define u3(p)   bytes2word(p, p, f3(p), f2(p))
+
+#define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
+#define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
+#define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
+#define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
+
+#endif
+
+#if defined(FIXED_TABLES) || !defined(FF_TABLES)
+
+#define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
+#define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
+#define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \
+                        ^ (((x>>5) & 4) * WPOLY))
+#define f3(x)   (f2(x) ^ x)
+#define f9(x)   (f8(x) ^ x)
+#define fb(x)   (f8(x) ^ f2(x) ^ x)
+#define fd(x)   (f8(x) ^ f4(x) ^ x)
+#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
+
+#else
+
+#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
+#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
+#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
+#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
+#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
+#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
+
+#endif
+
+#include "aestab.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#if defined(FIXED_TABLES)
+
+/* implemented in case of wrong call for fixed tables */
+
+AES_RETURN aes_init(void)
+{
+    return EXIT_SUCCESS;
+}
+
+#else   /*  Generate the tables for the dynamic table option */
+
+#if defined(FF_TABLES)
+
+#define gf_inv(x)   ((x) ? pow[ 255 - log[x]] : 0)
+
+#else 
+
+/*  It will generally be sensible to use tables to compute finite
+    field multiplies and inverses but where memory is scarse this
+    code might sometimes be better. But it only has effect during
+    initialisation so its pretty unimportant in overall terms.
+*/
+
+/*  return 2 ^ (n - 1) where n is the bit number of the highest bit
+    set in x with x in the range 1 < x < 0x00000200.   This form is
+    used so that locals within fi can be bytes rather than words
+*/
+
+static uint_8t hibit(const uint_32t x)
+{   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
+
+    r |= (r >> 2);
+    r |= (r >> 4);
+    return (r + 1) >> 1;
+}
+
+/* return the inverse of the finite field element x */
+
+static uint_8t gf_inv(const uint_8t x)
+{   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
+
+    if(x < 2) 
+        return x;
+
+    for( ; ; )
+    {
+        if(n1)
+            while(n2 >= n1)             /* divide polynomial p2 by p1    */
+            {
+                n2 /= n1;               /* shift smaller polynomial left */ 
+                p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
+                v2 ^= v1 * n2;          /* shift accumulated value and   */ 
+                n2 = hibit(p2);         /* add into result               */
+            }
+        else
+            return v1;
+
+        if(n2)                          /* repeat with values swapped    */ 
+            while(n1 >= n2)
+            {
+                n1 /= n2; 
+                p1 ^= p2 * n1; 
+                v1 ^= v2 * n1; 
+                n1 = hibit(p1);
+            }
+        else
+            return v2;
+    }
+}
+
+#endif
+
+/* The forward and inverse affine transformations used in the S-box */
+uint_8t fwd_affine(const uint_8t x)
+{   uint_32t w = x;
+    w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
+    return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
+}
+
+uint_8t inv_affine(const uint_8t x)
+{   uint_32t w = x;
+    w = (w << 1) ^ (w << 3) ^ (w << 6);
+    return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
+}
+
+static int init = 0;
+
+AES_RETURN aes_init(void)
+{   uint_32t  i, w;
+
+#if defined(FF_TABLES)
+
+    uint_8t  pow[512], log[256];
+
+    if(init)
+        return EXIT_SUCCESS;
+    /*  log and power tables for GF(2^8) finite field with
+        WPOLY as modular polynomial - the simplest primitive
+        root is 0x03, used here to generate the tables
+    */
+
+    i = 0; w = 1;
+    do
+    {
+        pow[i] = (uint_8t)w;
+        pow[i + 255] = (uint_8t)w;
+        log[w] = (uint_8t)i++;
+        w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
+    }
+    while (w != 1);
+
+#else
+    if(init)
+        return EXIT_SUCCESS;
+#endif
+
+    for(i = 0, w = 1; i < RC_LENGTH; ++i)
+    {
+        t_set(r,c)[i] = bytes2word(w, 0, 0, 0);
+        w = f2(w);
+    }
+
+    for(i = 0; i < 256; ++i)
+    {   uint_8t    b;
+
+        b = fwd_affine(gf_inv((uint_8t)i));
+        w = bytes2word(f2(b), b, b, f3(b));
+
+#if defined( SBX_SET )
+        t_set(s,box)[i] = b;
+#endif
+
+#if defined( FT1_SET )                 /* tables for a normal encryption round */
+        t_set(f,n)[i] = w;
+#endif
+#if defined( FT4_SET )
+        t_set(f,n)[0][i] = w;
+        t_set(f,n)[1][i] = upr(w,1);
+        t_set(f,n)[2][i] = upr(w,2);
+        t_set(f,n)[3][i] = upr(w,3);
+#endif
+        w = bytes2word(b, 0, 0, 0);
+
+#if defined( FL1_SET )            /* tables for last encryption round (may also   */
+        t_set(f,l)[i] = w;        /* be used in the key schedule)                 */
+#endif
+#if defined( FL4_SET )
+        t_set(f,l)[0][i] = w;
+        t_set(f,l)[1][i] = upr(w,1);
+        t_set(f,l)[2][i] = upr(w,2);
+        t_set(f,l)[3][i] = upr(w,3);
+#endif
+
+#if defined( LS1_SET )			/* table for key schedule if t_set(f,l) above is*/
+        t_set(l,s)[i] = w;      /* not of the required form                     */
+#endif
+#if defined( LS4_SET )
+        t_set(l,s)[0][i] = w;
+        t_set(l,s)[1][i] = upr(w,1);
+        t_set(l,s)[2][i] = upr(w,2);
+        t_set(l,s)[3][i] = upr(w,3);
+#endif
+
+        b = gf_inv(inv_affine((uint_8t)i));
+        w = bytes2word(fe(b), f9(b), fd(b), fb(b));
+
+#if defined( IM1_SET )			/* tables for the inverse mix column operation  */
+        t_set(i,m)[b] = w;
+#endif
+#if defined( IM4_SET )
+        t_set(i,m)[0][b] = w;
+        t_set(i,m)[1][b] = upr(w,1);
+        t_set(i,m)[2][b] = upr(w,2);
+        t_set(i,m)[3][b] = upr(w,3);
+#endif
+
+#if defined( ISB_SET )
+        t_set(i,box)[i] = b;
+#endif
+#if defined( IT1_SET )			/* tables for a normal decryption round */
+        t_set(i,n)[i] = w;
+#endif
+#if defined( IT4_SET )
+        t_set(i,n)[0][i] = w;
+        t_set(i,n)[1][i] = upr(w,1);
+        t_set(i,n)[2][i] = upr(w,2);
+        t_set(i,n)[3][i] = upr(w,3);
+#endif
+        w = bytes2word(b, 0, 0, 0);
+#if defined( IL1_SET )			/* tables for last decryption round */
+        t_set(i,l)[i] = w;
+#endif
+#if defined( IL4_SET )
+        t_set(i,l)[0][i] = w;
+        t_set(i,l)[1][i] = upr(w,1);
+        t_set(i,l)[2][i] = upr(w,2);
+        t_set(i,l)[3][i] = upr(w,3);
+#endif
+    }
+    init = 1;
+    return EXIT_SUCCESS;
+}
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
--- a/src/java/kp2akeytransform/jni/aes/aestab.h
+++ b/src/java/kp2akeytransform/jni/aes/aestab.h
@ -0,0 +1,180 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+
+ This file contains the code for declaring the tables needed to implement
+ AES. The file aesopt.h is assumed to be included before this header file.
+ If there are no global variables, the definitions here can be used to put
+ the AES tables in a structure so that a pointer can then be added to the
+ AES context to pass them to the AES routines that need them.   If this
+ facility is used, the calling program has to ensure that this pointer is
+ managed appropriately.  In particular, the value of the t_dec(in,it) item
+ in the table structure must be set to zero in order to ensure that the
+ tables are initialised. In practice the three code sequences in aeskey.c
+ that control the calls to aes_init() and the aes_init() routine itself will
+ have to be changed for a specific implementation. If global variables are
+ available it will generally be preferable to use them with the precomputed
+ FIXED_TABLES option that uses static global tables.
+
+ The following defines can be used to control the way the tables
+ are defined, initialised and used in embedded environments that
+ require special features for these purposes
+
+    the 't_dec' construction is used to declare fixed table arrays
+    the 't_set' construction is used to set fixed table values
+    the 't_use' construction is used to access fixed table values
+
+    256 byte tables:
+
+        t_xxx(s,box)    => forward S box
+        t_xxx(i,box)    => inverse S box
+
+    256 32-bit word OR 4 x 256 32-bit word tables:
+
+        t_xxx(f,n)      => forward normal round
+        t_xxx(f,l)      => forward last round
+        t_xxx(i,n)      => inverse normal round
+        t_xxx(i,l)      => inverse last round
+        t_xxx(l,s)      => key schedule table
+        t_xxx(i,m)      => key schedule table
+
+    Other variables and tables:
+
+        t_xxx(r,c)      => the rcon table
+*/
+
+#if !defined( _AESTAB_H )
+#define _AESTAB_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define t_dec(m,n) t_##m##n
+#define t_set(m,n) t_##m##n
+#define t_use(m,n) t_##m##n
+
+#if defined(FIXED_TABLES)
+#  if !defined( __GNUC__ ) && (defined( __MSDOS__ ) || defined( __WIN16__ ))
+/*   make tables far data to avoid using too much DGROUP space (PG) */
+#    define CONST const far
+#  else
+#    define CONST const
+#  endif
+#else
+#  define CONST
+#endif
+
+#if defined(DO_TABLES)
+#  define EXTERN
+#else
+#  define EXTERN extern
+#endif
+
+#if defined(_MSC_VER) && defined(TABLE_ALIGN)
+#define ALIGN __declspec(align(TABLE_ALIGN))
+#else
+#define ALIGN
+#endif
+
+#if defined( __WATCOMC__ ) && ( __WATCOMC__ >= 1100 )
+#  define XP_DIR __cdecl
+#else
+#  define XP_DIR
+#endif
+
+#if defined(DO_TABLES) && defined(FIXED_TABLES)
+#define d_1(t,n,b,e)       EXTERN ALIGN CONST XP_DIR t n[256]    =   b(e)
+#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256] = { b(e), b(f), b(g), b(h) }
+EXTERN ALIGN CONST uint_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0);
+#else
+#define d_1(t,n,b,e)       EXTERN ALIGN CONST XP_DIR t n[256]
+#define d_4(t,n,b,e,f,g,h) EXTERN ALIGN CONST XP_DIR t n[4][256]
+EXTERN ALIGN CONST uint_32t t_dec(r,c)[RC_LENGTH];
+#endif
+
+#if defined( SBX_SET )
+    d_1(uint_8t, t_dec(s,box), sb_data, h0);
+#endif
+#if defined( ISB_SET )
+    d_1(uint_8t, t_dec(i,box), isb_data, h0);
+#endif
+
+#if defined( FT1_SET )
+    d_1(uint_32t, t_dec(f,n), sb_data, u0);
+#endif
+#if defined( FT4_SET )
+    d_4(uint_32t, t_dec(f,n), sb_data, u0, u1, u2, u3);
+#endif
+
+#if defined( FL1_SET )
+    d_1(uint_32t, t_dec(f,l), sb_data, w0);
+#endif
+#if defined( FL4_SET )
+    d_4(uint_32t, t_dec(f,l), sb_data, w0, w1, w2, w3);
+#endif
+
+#if defined( IT1_SET )
+    d_1(uint_32t, t_dec(i,n), isb_data, v0);
+#endif
+#if defined( IT4_SET )
+    d_4(uint_32t, t_dec(i,n), isb_data, v0, v1, v2, v3);
+#endif
+
+#if defined( IL1_SET )
+    d_1(uint_32t, t_dec(i,l), isb_data, w0);
+#endif
+#if defined( IL4_SET )
+    d_4(uint_32t, t_dec(i,l), isb_data, w0, w1, w2, w3);
+#endif
+
+#if defined( LS1_SET )
+#if defined( FL1_SET )
+#undef  LS1_SET
+#else
+    d_1(uint_32t, t_dec(l,s), sb_data, w0);
+#endif
+#endif
+
+#if defined( LS4_SET )
+#if defined( FL4_SET )
+#undef  LS4_SET
+#else
+    d_4(uint_32t, t_dec(l,s), sb_data, w0, w1, w2, w3);
+#endif
+#endif
+
+#if defined( IM1_SET )
+    d_1(uint_32t, t_dec(i,m), mm_data, v0);
+#endif
+#if defined( IM4_SET )
+    d_4(uint_32t, t_dec(i,m), mm_data, v0, v1, v2, v3);
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/src/java/kp2akeytransform/jni/aes/aesxam.c
+++ b/src/java/kp2akeytransform/jni/aes/aesxam.c
@ -0,0 +1,426 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+*/
+
+//  An example of the use of AES (Rijndael) for file encryption.  This code
+//  implements AES in CBC mode with ciphertext stealing when the file length
+//  is greater than one block (16 bytes).  This code is an example of how to
+//  use AES and is not intended for real use since it does not provide any
+//  file integrity checking.
+//
+//  The Command line is:
+//
+//      aesxam input_file_name output_file_name [D|E] hexadecimalkey
+//
+//  where E gives encryption and D decryption of the input file into the
+//  output file using the given hexadecimal key string.  The later is a
+//  hexadecimal sequence of 32, 48 or 64 digits.  Examples to encrypt or
+//  decrypt aes.c into aes.enc are:
+//
+//      aesxam file.c file.enc E 0123456789abcdeffedcba9876543210
+//
+//      aesxam file.enc file2.c D 0123456789abcdeffedcba9876543210
+//
+//  which should return a file 'file2.c' identical to 'file.c'
+//
+//  CIPHERTEXT STEALING
+//
+//  Ciphertext stealing modifies the encryption of the last two CBC
+//  blocks. It can be applied invariably to the last two plaintext
+//  blocks or only applied when the last block is a partial one. In
+//  this code it is only applied if there is a partial block.  For
+//  a plaintext consisting of N blocks, with the last block possibly
+//  a partial one, ciphertext stealing works as shown below (note the
+//  reversal of the last two ciphertext blocks).  During decryption
+//  the part of the C:N-1 block that is not transmitted (X) can be
+//  obtained from the decryption of the penultimate ciphertext block
+//  since the bytes in X are xored with the zero padding appended to
+//  the last plaintext block.
+//
+//  This is a picture of the processing of the last
+//  plaintext blocks during encryption:
+//
+//    +---------+   +---------+   +---------+   +-------+-+
+//    |  P:N-4  |   |  P:N-3  |   |  P:N-2  |   | P:N-1 |0|
+//    +---------+   +---------+   +---------+   +-------+-+
+//         |             |             |             |
+//         v             v             v             v
+//  +----->x      +----->x      +----->x      +----->x   x = xor
+//  |      |      |      |      |      |      |      |
+//  |      v      |      v      |      v      |      v
+//  |    +---+    |    +---+    |    +---+    |    +---+
+//  |    | E |    |    | E |    |    | E |    |    | E |
+//  |    +---+    |    +---+    |    +---+    |    +---+
+//  |      |      |      |      |      |      |      |
+//  |      |      |      |      |      v      |  +---+
+//  |      |      |      |      | +-------+-+ |  |
+//  |      |      |      |      | | C:N-1 |X| |  |
+//  |      |      |      |      | +-------+-+ ^  |
+//  |      |      |      |      |     ||      |  |
+//  |      |      |      |      |     |+------+  |
+//  |      |      |      |      |     +----------|--+
+//  |      |      |      |      |                |  |
+//  |      |      |      |      |      +---------+  |
+//  |      |      |      |      |      |            |
+//  |      v      |      v      |      v            v
+//  | +---------+ | +---------+ | +---------+   +-------+
+// -+ |  C:N-4  |-+ |  C:N-3  |-+ |  C:N-2  |   | C:N-1 |
+//    +---------+   +---------+   +---------+   +-------+
+//
+//  And this is a picture of the processing of the last
+//  ciphertext blocks during decryption:
+//
+//    +---------+   +---------+   +---------+   +-------+
+// -+ |  C:N-4  |-+ |  C:N-3  |-+ |  C:N-2  |   | C:N-1 |
+//  | +---------+ | +---------+ | +---------+   +-------+
+//  |      |      |      |      |      |            |
+//  |      v      |      v      |      v   +--------|----+
+//  |    +---+    |    +---+    |    +---+ |  +--<--+    |
+//  |    | D |    |    | D |    |    | D | |  |     |    |
+//  |    +---+    |    +---+    |    +---+ |  |     v    v
+//  |      |      |      |      |      |   ^  | +-------+-+
+//  |      v      |      v      |      v   |  | | C:N-1 |X|
+//  +----->x      +----->x      | +-------+-+ | +-------+-+
+//         |             |      | |       |X| |      |
+//         |             |      | +-------+-+ |      v
+//         |             |      |     |       |    +---+
+//         |             |      |     |       v    | D |
+//         |             |      |     +------>x    +---+
+//         |             |      |             |      |
+//         |             |      +----->x<-----|------+   x = xor
+//         |             |             |      +-----+
+//         |             |             |            |
+//         v             v             v            v
+//    +---------+   +---------+   +---------+   +-------+
+//    |  P:N-4  |   |  P:N-3  |   |  P:N-2  |   | P:N-1 |
+//    +---------+   +---------+   +---------+   +-------+
+
+#include <stdio.h>
+#include <ctype.h>
+
+#include "aes.h"
+#include "rdtsc.h"
+
+#define BLOCK_LEN   16
+
+#define OK           0
+#define READ_ERROR  -7
+#define WRITE_ERROR -8
+
+//  A Pseudo Random Number Generator (PRNG) used for the
+//  Initialisation Vector. The PRNG is George Marsaglia's
+//  Multiply-With-Carry (MWC) PRNG that concatenates two
+//  16-bit MWC generators:
+//      x(n)=36969 * x(n-1) + carry mod 2^16
+//      y(n)=18000 * y(n-1) + carry mod 2^16
+//  to produce a combined PRNG with a period of about 2^60.
+//  The Pentium cycle counter is used to initialise it. This
+//  is crude but the IV does not really need to be secret.
+
+#define RAND(a,b) (((a = 36969 * (a & 65535) + (a >> 16)) << 16) + \
+                    (b = 18000 * (b & 65535) + (b >> 16))  )
+
+void fillrand(unsigned char *buf, const int len)
+{   static unsigned long a[2], mt = 1, count = 4;
+    static unsigned char r[4];
+    int                  i;
+
+    if(mt) { mt = 0; *(unsigned long long*)a = read_tsc(); }
+
+    for(i = 0; i < len; ++i)
+    {
+        if(count == 4)
+        {
+            *(unsigned long*)r = RAND(a[0], a[1]);
+            count = 0;
+        }
+
+        buf[i] = r[count++];
+    }
+}
+
+int encfile(FILE *fin, FILE *fout, aes_encrypt_ctx ctx[1])
+{   unsigned char dbuf[3 * BLOCK_LEN];
+    unsigned long i, len, wlen = BLOCK_LEN;
+
+    // When ciphertext stealing is used, we three ciphertext blocks so
+    // we use a buffer that is three times the block length.  The buffer
+    // pointers b1, b2 and b3 point to the buffer positions of three
+    // ciphertext blocks, b3 being the most recent and b1 being the
+    // oldest. We start with the IV in b1 and the block to be decrypted
+    // in b2.
+
+    // set a random IV
+
+    fillrand(dbuf, BLOCK_LEN);
+
+    // read the first file block
+    len = (unsigned long) fread((char*)dbuf + BLOCK_LEN, 1, BLOCK_LEN, fin);
+
+    if(len < BLOCK_LEN)
+    {   // if the file length is less than one block
+
+        // xor the file bytes with the IV bytes
+        for(i = 0; i < len; ++i)
+            dbuf[i + BLOCK_LEN] ^= dbuf[i];
+
+        // encrypt the top 16 bytes of the buffer
+        aes_encrypt(dbuf + len, dbuf + len, ctx);
+
+        len += BLOCK_LEN;
+        // write the IV and the encrypted file bytes
+        if(fwrite((char*)dbuf, 1, len, fout) != len)
+            return WRITE_ERROR;
+
+        return OK;
+    }
+    else    // if the file length is more 16 bytes
+    {   unsigned char *b1 = dbuf, *b2 = b1 + BLOCK_LEN, *b3 = b2 + BLOCK_LEN, *bt;
+
+        // write the IV
+        if(fwrite((char*)dbuf, 1, BLOCK_LEN, fout) != BLOCK_LEN)
+            return WRITE_ERROR;
+
+        for( ; ; )
+        {
+            // read the next block to see if ciphertext stealing is needed
+            len = (unsigned long)fread((char*)b3, 1, BLOCK_LEN, fin);
+
+            // do CBC chaining prior to encryption for current block (in b2)
+            for(i = 0; i < BLOCK_LEN; ++i)
+                b1[i] ^= b2[i];
+
+            // encrypt the block (now in b1)
+            aes_encrypt(b1, b1, ctx);
+
+            if(len != 0 && len != BLOCK_LEN)    // use ciphertext stealing
+            {
+                // set the length of the last block
+                wlen = len;
+
+                // xor ciphertext into last block
+                for(i = 0; i < len; ++i)
+                    b3[i] ^= b1[i];
+
+                // move 'stolen' ciphertext into last block
+                for(i = len; i < BLOCK_LEN; ++i)
+                    b3[i] = b1[i];
+
+                // encrypt this block
+                aes_encrypt(b3, b3, ctx);
+
+                // and write it as the second to last encrypted block
+                if(fwrite((char*)b3, 1, BLOCK_LEN, fout) != BLOCK_LEN)
+                    return WRITE_ERROR;
+            }
+
+            // write the encrypted block
+            if(fwrite((char*)b1, 1, wlen, fout) != wlen)
+                return WRITE_ERROR;
+
+            if(len != BLOCK_LEN)
+                return OK;
+
+            // advance the buffer pointers
+            bt = b3, b3 = b2, b2 = b1, b1 = bt;
+        }
+    }
+}
+
+int decfile(FILE *fin, FILE *fout, aes_decrypt_ctx ctx[1])
+{   unsigned char dbuf[3 * BLOCK_LEN], buf[BLOCK_LEN];
+    unsigned long i, len, wlen = BLOCK_LEN;
+
+    // When ciphertext stealing is used, we three ciphertext blocks so
+    // we use a buffer that is three times the block length.  The buffer
+    // pointers b1, b2 and b3 point to the buffer positions of three
+    // ciphertext blocks, b3 being the most recent and b1 being the
+    // oldest. We start with the IV in b1 and the block to be decrypted
+    // in b2.
+
+    len = (unsigned long)fread((char*)dbuf, 1, 2 * BLOCK_LEN, fin);
+
+    if(len < 2 * BLOCK_LEN) // the original file is less than one block in length
+    {
+        len -= BLOCK_LEN;
+        // decrypt from position len to position len + BLOCK_LEN
+        aes_decrypt(dbuf + len, dbuf + len, ctx);
+
+        // undo the CBC chaining
+        for(i = 0; i < len; ++i)
+            dbuf[i] ^= dbuf[i + BLOCK_LEN];
+
+        // output the decrypted bytes
+        if(fwrite((char*)dbuf, 1, len, fout) != len)
+            return WRITE_ERROR;
+
+        return OK;
+    }
+    else
+    {   unsigned char *b1 = dbuf, *b2 = b1 + BLOCK_LEN, *b3 = b2 + BLOCK_LEN, *bt;
+
+        for( ; ; )  // while some ciphertext remains, prepare to decrypt block b2
+        {
+            // read in the next block to see if ciphertext stealing is needed
+            len = fread((char*)b3, 1, BLOCK_LEN, fin);
+
+            // decrypt the b2 block
+            aes_decrypt(b2, buf, ctx);
+
+            if(len == 0 || len == BLOCK_LEN)    // no ciphertext stealing
+            {
+                // unchain CBC using the previous ciphertext block in b1
+                for(i = 0; i < BLOCK_LEN; ++i)
+                    buf[i] ^= b1[i];
+            }
+            else    // partial last block - use ciphertext stealing
+            {
+                wlen = len;
+
+                // produce last 'len' bytes of plaintext by xoring with
+                // the lowest 'len' bytes of next block b3 - C[N-1]
+                for(i = 0; i < len; ++i)
+                    buf[i] ^= b3[i];
+
+                // reconstruct the C[N-1] block in b3 by adding in the
+                // last (BLOCK_LEN - len) bytes of C[N-2] in b2
+                for(i = len; i < BLOCK_LEN; ++i)
+                    b3[i] = buf[i];
+
+                // decrypt the C[N-1] block in b3
+                aes_decrypt(b3, b3, ctx);
+
+                // produce the last but one plaintext block by xoring with
+                // the last but two ciphertext block
+                for(i = 0; i < BLOCK_LEN; ++i)
+                    b3[i] ^= b1[i];
+
+                // write decrypted plaintext blocks
+                if(fwrite((char*)b3, 1, BLOCK_LEN, fout) != BLOCK_LEN)
+                    return WRITE_ERROR;
+            }
+
+            // write the decrypted plaintext block
+            if(fwrite((char*)buf, 1, wlen, fout) != wlen)
+                return WRITE_ERROR;
+
+            if(len != BLOCK_LEN)
+                return OK;
+
+            // advance the buffer pointers
+            bt = b1, b1 = b2, b2 = b3, b3 = bt;
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{   FILE            *fin = 0, *fout = 0;
+    char            *cp, ch, key[32];
+    int             i, by = 0, key_len, err = 0;
+
+    if(argc != 5 || toupper(*argv[3]) != 'D' && toupper(*argv[3]) != 'E')
+    {
+        printf("usage: aesxam in_filename out_filename [d/e] key_in_hex\n");
+        err = -1; goto exit;
+    }
+
+    aes_init();     // in case dynamic AES tables are being used
+
+    cp = argv[4];   // this is a pointer to the hexadecimal key digits
+    i = 0;          // this is a count for the input digits processed
+
+    while(i < 64 && *cp)        // the maximum key length is 32 bytes and
+    {                           // hence at most 64 hexadecimal digits
+        ch = toupper(*cp++);    // process a hexadecimal digit
+        if(ch >= '0' && ch <= '9')
+            by = (by << 4) + ch - '0';
+        else if(ch >= 'A' && ch <= 'F')
+            by = (by << 4) + ch - 'A' + 10;
+        else                    // error if not hexadecimal
+        {
+            printf("key must be in hexadecimal notation\n");
+            err = -2; goto exit;
+        }
+
+        // store a key byte for each pair of hexadecimal digits
+        if(i++ & 1)
+            key[i / 2 - 1] = by & 0xff;
+    }
+
+    if(*cp)
+    {
+        printf("The key value is too long\n");
+        err = -3; goto exit;
+    }
+    else if(i < 32 || (i & 15))
+    {
+        printf("The key length must be 32, 48 or 64 hexadecimal digits\n");
+        err = -4; goto exit;
+    }
+
+    key_len = i / 2;
+
+    if(!(fin = fopen(argv[1], "rb")))   // try to open the input file
+    {
+        printf("The input file: %s could not be opened\n", argv[1]);
+        err = -5; goto exit;
+    }
+
+    if(!(fout = fopen(argv[2], "wb")))  // try to open the output file
+    {
+        printf("The output file: %s could not be opened\n", argv[2]);
+        err = -6; goto exit;
+    }
+
+    if(toupper(*argv[3]) == 'E') // encryption in Cipher Block Chaining mode
+    {   aes_encrypt_ctx ctx[1];
+
+        aes_encrypt_key((unsigned char*)key, key_len, ctx);
+
+        err = encfile(fin, fout, ctx);
+    }
+    else                         // decryption in Cipher Block Chaining mode
+    {   aes_decrypt_ctx ctx[1];
+
+        aes_decrypt_key((unsigned char*)key, key_len, ctx);
+
+        err = decfile(fin, fout, ctx);
+    }
+exit:
+    if(err == READ_ERROR)
+        printf("Error reading from input file: %s\n", argv[1]);
+
+    if(err == WRITE_ERROR)
+        printf("Error writing to output file: %s\n", argv[2]);
+
+    if(fout)
+        fclose(fout);
+
+    if(fin)
+        fclose(fin);
+
+    return err;
+}
--- a/src/java/kp2akeytransform/jni/aes/brg_endian.h
+++ b/src/java/kp2akeytransform/jni/aes/brg_endian.h
@ -0,0 +1,133 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+*/
+
+#ifndef _BRG_ENDIAN_H
+#define _BRG_ENDIAN_H
+
+#define IS_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
+
+/* Include files where endian defines and byteswap functions may reside */
+#if defined( __sun )
+#  include <sys/isa_defs.h>
+#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
+#  include <sys/endian.h>
+#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
+      defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
+#  include <machine/endian.h>
+#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
+#  if !defined( __MINGW32__ ) && !defined( _AIX )
+#    include <endian.h>
+#    if !defined( __BEOS__ )
+#      include <byteswap.h>
+#    endif
+#  endif
+#endif
+
+/* Now attempt to set the define for platform byte order using any  */
+/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which  */
+/* seem to encompass most endian symbol definitions                 */
+
+#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
+#  if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#  elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#  endif
+#elif defined( BIG_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( LITTLE_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
+#  if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#  elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#  endif
+#elif defined( _BIG_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( _LITTLE_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
+#  if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#  elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
+#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#  endif
+#elif defined( __BIG_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
+#  if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
+#    define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#  elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
+#    define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#  endif
+#elif defined( __BIG_ENDIAN__ )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#elif defined( __LITTLE_ENDIAN__ )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+/*  if the platform byte order could not be determined, then try to */
+/*  set this define using common machine defines                    */
+#if !defined(PLATFORM_BYTE_ORDER)
+
+#if   defined( __alpha__ ) || defined( __alpha ) || defined( i386 )       || \
+      defined( __i386__ )  || defined( _M_I86 )  || defined( _M_IX86 )    || \
+      defined( __OS2__ )   || defined( sun386 )  || defined( __TURBOC__ ) || \
+      defined( vax )       || defined( vms )     || defined( VMS )        || \
+      defined( __VMS )     || defined( _M_X64 )
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+
+#elif defined( AMIGA )   || defined( applec )    || defined( __AS400__ )  || \
+      defined( _CRAY )   || defined( __hppa )    || defined( __hp9000 )   || \
+      defined( ibm370 )  || defined( mc68000 )   || defined( m68k )       || \
+      defined( __MRC__ ) || defined( __MVS__ )   || defined( __MWERKS__ ) || \
+      defined( sparc )   || defined( __sparc)    || defined( SYMANTEC_C ) || \
+      defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM )   || \
+      defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+
+#elif 0     /* **** EDIT HERE IF NECESSARY **** */
+#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0     /* **** EDIT HERE IF NECESSARY **** */
+#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+#  error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
+#endif
+
+#endif
+
+#endif
--- a/src/java/kp2akeytransform/jni/aes/brg_types.h
+++ b/src/java/kp2akeytransform/jni/aes/brg_types.h
@ -0,0 +1,226 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+
+ The unsigned integer types defined here are of the form uint_<nn>t where
+ <nn> is the length of the type; for example, the unsigned 32-bit type is
+ 'uint_32t'.  These are NOT the same as the 'C99 integer types' that are
+ defined in the inttypes.h and stdint.h headers since attempts to use these
+ types have shown that support for them is still highly variable.  However,
+ since the latter are of the form uint<nn>_t, a regular expression search
+ and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t')
+ can be used to convert the types used here to the C99 standard types.
+*/
+
+#ifndef _BRG_TYPES_H
+#define _BRG_TYPES_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <limits.h>
+
+#if defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
+#  include <stddef.h>
+#  define ptrint_t intptr_t
+#elif defined( __ECOS__ )
+#  define intptr_t unsigned int
+#  define ptrint_t intptr_t
+#elif defined( __GNUC__ ) && ( __GNUC__ >= 3 )
+#  include <stdint.h>
+#  define ptrint_t intptr_t
+#else
+#  define ptrint_t int
+#endif
+
+#ifndef BRG_UI8
+#  define BRG_UI8
+#  if UCHAR_MAX == 255u
+     typedef unsigned char uint_8t;
+#  else
+#    error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h
+#  endif
+#endif
+
+#ifndef BRG_UI16
+#  define BRG_UI16
+#  if USHRT_MAX == 65535u
+     typedef unsigned short uint_16t;
+#  else
+#    error Please define uint_16t as a 16-bit unsigned short type in brg_types.h
+#  endif
+#endif
+
+#ifndef BRG_UI32
+#  define BRG_UI32
+#  if UINT_MAX == 4294967295u
+#    define li_32(h) 0x##h##u
+     typedef unsigned int uint_32t;
+#  elif ULONG_MAX == 4294967295u
+#    define li_32(h) 0x##h##ul
+     typedef unsigned long uint_32t;
+#  elif defined( _CRAY )
+#    error This code needs 32-bit data types, which Cray machines do not provide
+#  else
+#    error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h
+#  endif
+#endif
+
+#ifndef BRG_UI64
+#  if defined( __BORLANDC__ ) && !defined( __MSDOS__ )
+#    define BRG_UI64
+#    define li_64(h) 0x##h##ui64
+     typedef unsigned __int64 uint_64t;
+#  elif defined( _MSC_VER ) && ( _MSC_VER < 1300 )    /* 1300 == VC++ 7.0 */
+#    define BRG_UI64
+#    define li_64(h) 0x##h##ui64
+     typedef unsigned __int64 uint_64t;
+#  elif defined( __sun ) && defined( ULONG_MAX ) && ULONG_MAX == 0xfffffffful
+#    define BRG_UI64
+#    define li_64(h) 0x##h##ull
+     typedef unsigned long long uint_64t;
+#  elif defined( __MVS__ )
+#    define BRG_UI64
+#    define li_64(h) 0x##h##ull
+     typedef unsigned int long long uint_64t;
+#  elif defined( UINT_MAX ) && UINT_MAX > 4294967295u
+#    if UINT_MAX == 18446744073709551615u
+#      define BRG_UI64
+#      define li_64(h) 0x##h##u
+       typedef unsigned int uint_64t;
+#    endif
+#  elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u
+#    if ULONG_MAX == 18446744073709551615ul
+#      define BRG_UI64
+#      define li_64(h) 0x##h##ul
+       typedef unsigned long uint_64t;
+#    endif
+#  elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u
+#    if ULLONG_MAX == 18446744073709551615ull
+#      define BRG_UI64
+#      define li_64(h) 0x##h##ull
+       typedef unsigned long long uint_64t;
+#    endif
+#  elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u
+#    if ULONG_LONG_MAX == 18446744073709551615ull
+#      define BRG_UI64
+#      define li_64(h) 0x##h##ull
+       typedef unsigned long long uint_64t;
+#    endif
+#  endif
+#endif
+
+#if !defined( BRG_UI64 )
+#  if defined( NEED_UINT_64T )
+#    error Please define uint_64t as an unsigned 64 bit type in brg_types.h
+#  endif
+#endif
+
+#ifndef RETURN_VALUES
+#  define RETURN_VALUES
+#  if defined( DLL_EXPORT )
+#    if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
+#      define VOID_RETURN    __declspec( dllexport ) void __stdcall
+#      define INT_RETURN     __declspec( dllexport ) int  __stdcall
+#    elif defined( __GNUC__ )
+#      define VOID_RETURN    __declspec( __dllexport__ ) void
+#      define INT_RETURN     __declspec( __dllexport__ ) int
+#    else
+#      error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
+#    endif
+#  elif defined( DLL_IMPORT )
+#    if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
+#      define VOID_RETURN    __declspec( dllimport ) void __stdcall
+#      define INT_RETURN     __declspec( dllimport ) int  __stdcall
+#    elif defined( __GNUC__ )
+#      define VOID_RETURN    __declspec( __dllimport__ ) void
+#      define INT_RETURN     __declspec( __dllimport__ ) int
+#    else
+#      error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
+#    endif
+#  elif defined( __WATCOMC__ )
+#    define VOID_RETURN  void __cdecl
+#    define INT_RETURN   int  __cdecl
+#  else
+#    define VOID_RETURN  void
+#    define INT_RETURN   int
+#  endif
+#endif
+
+/*	These defines are used to detect and set the memory alignment of pointers.
+    Note that offsets are in bytes.
+
+	ALIGN_OFFSET(x,n)			return the positive or zero offset of 
+								the memory addressed by the pointer 'x' 
+								from an address that is aligned on an 
+								'n' byte boundary ('n' is a power of 2)
+
+	ALIGN_FLOOR(x,n)			return a pointer that points to memory
+								that is aligned on an 'n' byte boundary 
+								and is not higher than the memory address
+								pointed to by 'x' ('n' is a power of 2)
+
+	ALIGN_CEIL(x,n)				return a pointer that points to memory
+								that is aligned on an 'n' byte boundary 
+								and is not lower than the memory address
+								pointed to by 'x' ('n' is a power of 2)
+*/
+
+#define ALIGN_OFFSET(x,n)	(((ptrint_t)(x)) & ((n) - 1))
+#define ALIGN_FLOOR(x,n)	((uint_8t*)(x) - ( ((ptrint_t)(x)) & ((n) - 1)))
+#define ALIGN_CEIL(x,n)		((uint_8t*)(x) + (-((ptrint_t)(x)) & ((n) - 1)))
+
+/*  These defines are used to declare buffers in a way that allows
+    faster operations on longer variables to be used.  In all these
+    defines 'size' must be a power of 2 and >= 8. NOTE that the 
+    buffer size is in bytes but the type length is in bits
+
+    UNIT_TYPEDEF(x,size)        declares a variable 'x' of length 
+                                'size' bits
+
+    BUFR_TYPEDEF(x,size,bsize)  declares a buffer 'x' of length 'bsize' 
+                                bytes defined as an array of variables
+                                each of 'size' bits (bsize must be a 
+                                multiple of size / 8)
+
+    UNIT_CAST(x,size)           casts a variable to a type of 
+                                length 'size' bits
+
+    UPTR_CAST(x,size)           casts a pointer to a pointer to a 
+                                varaiable of length 'size' bits
+*/
+
+#define UI_TYPE(size)               uint_##size##t
+#define UNIT_TYPEDEF(x,size)        typedef UI_TYPE(size) x
+#define BUFR_TYPEDEF(x,size,bsize)  typedef UI_TYPE(size) x[bsize / (size >> 3)]
+#define UNIT_CAST(x,size)           ((UI_TYPE(size) )(x))  
+#define UPTR_CAST(x,size)           ((UI_TYPE(size)*)(x))
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/src/java/kp2akeytransform/jni/aes/rfc3686.c
+++ b/src/java/kp2akeytransform/jni/aes/rfc3686.c
@ -0,0 +1,331 @@
+
+#include <stdio.h>
+#include <string.h>
+#include "aes.h"
+
+typedef struct 
+{   unsigned int  k_len;
+    unsigned int  m_len;
+    unsigned char key[32];
+    unsigned char iv[8];
+    unsigned char nonce[8];
+    unsigned char p_txt[36];
+    unsigned char c_str[48];
+    unsigned char k_str[48];
+    unsigned char c_txt[36];
+} test_str;
+
+test_str tests[] = 
+{
+ {  16, 16, /* Vector 1 */
+  { 0xae, 0x68, 0x52, 0xf8, 0x12, 0x10, 0x67, 0xcc, 
+    0x4b, 0xf7, 0xa5, 0x76, 0x55, 0x77, 0xf3, 0x9e 
+  },
+  { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+  },
+  { 0x00, 0x00, 0x00, 0x30
+  },
+  /* "Single block msg" */
+  { 0x53, 0x69, 0x6e, 0x67, 0x6c, 0x65, 0x20, 0x62, 
+    0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x6d, 0x73, 0x67 
+  },
+  { 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01
+  }, 
+  { 0xb7, 0x60, 0x33, 0x28, 0xdb, 0xc2, 0x93, 0x1b, 
+    0x41, 0x0e, 0x16, 0xc8, 0x06, 0x7e, 0x62, 0xdf
+  },
+  { 0xe4, 0x09, 0x5d, 0x4f, 0xb7, 0xa7, 0xb3, 0x79, 
+    0x2d, 0x61, 0x75, 0xa3, 0x26, 0x13, 0x11, 0xb8 
+  }
+ },
+ {  16, 32, /* Vector 2 */
+  { 0x7e, 0x24, 0x06, 0x78, 0x17, 0xfa, 0xe0, 0xd7, 
+    0x43, 0xd6, 0xce, 0x1f, 0x32, 0x53, 0x91, 0x63
+  },
+  { 0xc0, 0x54, 0x3b, 0x59, 0xda, 0x48, 0xd9, 0x0b 
+  }, 
+  { 0x00, 0x6c, 0xb6, 0xdb
+  },
+  { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+  },
+  { 0x00, 0x6c, 0xb6, 0xdb, 0xc0, 0x54, 0x3b, 0x59, 
+    0xda, 0x48, 0xd9, 0x0b, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0x6c, 0xb6, 0xdb, 0xc0, 0x54, 0x3b, 0x59, 
+    0xda, 0x48, 0xd9, 0x0b, 0x00, 0x00, 0x00, 0x02
+  }, 
+  { 0x51, 0x05, 0xa3, 0x05, 0x12, 0x8f, 0x74, 0xde, 
+    0x71, 0x04, 0x4b, 0xe5, 0x82, 0xd7, 0xdd, 0x87, 
+    0xfb, 0x3f, 0x0c, 0xef, 0x52, 0xcf, 0x41, 0xdf, 
+    0xe4, 0xff, 0x2a, 0xc4, 0x8d, 0x5c, 0xa0, 0x37
+  },
+  { 0x51, 0x04, 0xa1, 0x06, 0x16, 0x8a, 0x72, 0xd9, 
+    0x79, 0x0d, 0x41, 0xee, 0x8e, 0xda, 0xd3, 0x88,
+    0xeb, 0x2e, 0x1e, 0xfc, 0x46, 0xda, 0x57, 0xc8, 
+    0xfc, 0xe6, 0x30, 0xdf, 0x91, 0x41, 0xbe, 0x28 
+  }
+ },
+ {  16, 36, /* Vector 3 */
+  { 0x76, 0x91, 0xbe, 0x03, 0x5e, 0x50, 0x20, 0xa8, 
+    0xac, 0x6e, 0x61, 0x85, 0x29, 0xf9, 0xa0, 0xdc 
+  },
+  { 0x27, 0x77, 0x7f, 0x3f,  0x4a, 0x17, 0x86, 0xf0 
+  },
+  { 0x00, 0xe0, 0x01, 0x7b
+  }, 
+  { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 
+    0x20, 0x21, 0x22, 0x23
+  }, 
+  { 0x00, 0xe0, 0x01, 0x7b, 0x27, 0x77, 0x7f, 0x3f, 
+    0x4a, 0x17, 0x86, 0xf0, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0xe0, 0x01, 0x7b, 0x27, 0x77, 0x7f, 0x3f, 
+    0x4a, 0x17, 0x86, 0xf0, 0x00, 0x00, 0x00, 0x02, 
+    0x00, 0xe0, 0x01, 0x7b, 0x27, 0x77, 0x7f, 0x3f, 
+    0x4a, 0x17, 0x86, 0xf0, 0x00, 0x00, 0x00, 0x03 
+  },
+  { 0xc1, 0xce, 0x4a, 0xab, 0x9b, 0x2a, 0xfb, 0xde, 
+    0xc7, 0x4f, 0x58, 0xe2, 0xe3, 0xd6, 0x7c, 0xd8, 
+    0x55, 0x51, 0xb6, 0x38, 0xca, 0x78, 0x6e, 0x21, 
+    0xcd, 0x83, 0x46, 0xf1, 0xb2, 0xee, 0x0e, 0x4c, 
+    0x05, 0x93, 0x25, 0x0c, 0x17, 0x55, 0x36, 0x00, 
+    0xa6, 0x3d, 0xfe, 0xcf, 0x56, 0x23, 0x87, 0xe9
+  }, 
+  { 0xc1, 0xcf, 0x48, 0xa8, 0x9f, 0x2f, 0xfd, 0xd9, 
+    0xcf, 0x46, 0x52, 0xe9, 0xef, 0xdb, 0x72, 0xd7, 
+    0x45, 0x40, 0xa4, 0x2b, 0xde, 0x6d, 0x78, 0x36, 
+    0xd5, 0x9a, 0x5c, 0xea, 0xae, 0xf3, 0x10, 0x53, 
+    0x25, 0xb2, 0x07, 0x2f 
+  }
+ },
+ {  24, 16, /* Vector 4 */
+  { 0x16, 0xaf, 0x5b, 0x14, 0x5f, 0xc9, 0xf5, 0x79, 
+    0xc1, 0x75, 0xf9, 0x3e, 0x3b, 0xfb, 0x0e, 0xed, 
+    0x86, 0x3d, 0x06, 0xcc, 0xfd, 0xb7, 0x85, 0x15 
+  },
+  { 0x36, 0x73, 0x3c, 0x14, 0x7d, 0x6d, 0x93, 0xcb
+  },
+  { 0x00, 0x00, 0x00, 0x48
+  }, 
+  /* "Single block msg" */
+  { 0x53, 0x69, 0x6e, 0x67, 0x6c, 0x65, 0x20, 0x62, 
+    0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x6d, 0x73, 0x67 
+  },
+  { 0x00, 0x00, 0x00, 0x48, 0x36, 0x73, 0x3c, 0x14, 
+    0x7d, 0x6d, 0x93, 0xcb, 0x00, 0x00, 0x00, 0x01 
+  },
+  { 0x18, 0x3c, 0x56, 0x28, 0x8e, 0x3c, 0xe9, 0xaa, 
+    0x22, 0x16, 0x56, 0xcb, 0x23, 0xa6, 0x9a, 0x4f
+  },
+  { 0x4b, 0x55, 0x38, 0x4f, 0xe2, 0x59, 0xc9, 0xc8, 
+    0x4e, 0x79, 0x35, 0xa0, 0x03, 0xcb, 0xe9, 0x28
+  }
+ },
+ {  24, 32, /* Vector 5 */
+  { 0x7c, 0x5c, 0xb2, 0x40, 0x1b, 0x3d, 0xc3, 0x3c, 
+    0x19, 0xe7, 0x34, 0x08, 0x19, 0xe0, 0xf6, 0x9c, 
+    0x67, 0x8c, 0x3d, 0xb8, 0xe6, 0xf6, 0xa9, 0x1a 
+  },
+  { 0x02, 0x0c, 0x6e, 0xad, 0xc2, 0xcb, 0x50, 0x0d
+  },  
+  { 0x00, 0x96, 0xb0, 0x3b 
+  },
+  { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+  },
+  { 0x00, 0x96, 0xb0, 0x3b, 0x02, 0x0c, 0x6e, 0xad, 
+    0xc2, 0xcb, 0x50, 0x0d, 0x00, 0x00, 0x00, 0x01, 
+    0x00, 0x96, 0xb0, 0x3b, 0x02, 0x0c, 0x6e, 0xad, 
+    0xc2, 0xcb, 0x50, 0x0d, 0x00, 0x00, 0x00, 0x02 
+  },
+  { 0x45, 0x33, 0x41, 0xff, 0x64, 0x9e, 0x25, 0x35, 
+    0x76, 0xd6, 0xa0, 0xf1, 0x7d, 0x3c, 0xc3, 0x90,
+    0x94, 0x81, 0x62, 0x0f, 0x4e, 0xc1, 0xb1, 0x8b, 
+    0xe4, 0x06, 0xfa, 0xe4, 0x5e, 0xe9, 0xe5, 0x1f
+  },
+  { 0x45, 0x32, 0x43, 0xfc, 0x60, 0x9b, 0x23, 0x32, 
+    0x7e, 0xdf, 0xaa, 0xfa, 0x71, 0x31, 0xcd, 0x9f, 
+    0x84, 0x90, 0x70, 0x1c, 0x5a, 0xd4, 0xa7, 0x9c, 
+    0xfc, 0x1f, 0xe0, 0xff, 0x42, 0xf4, 0xfb, 0x00 
+  }
+ },
+ {  24, 36, /* Vector 6 */
+  { 0x02, 0xbf, 0x39, 0x1e, 0xe8, 0xec, 0xb1, 0x59, 
+    0xb9, 0x59, 0x61, 0x7b, 0x09, 0x65, 0x27, 0x9b, 
+    0xf5, 0x9b, 0x60, 0xa7, 0x86, 0xd3, 0xe0, 0xfe
+  }, 
+  { 0x5c, 0xbd, 0x60, 0x27, 0x8d, 0xcc, 0x09, 0x12
+  }, 
+  { 0x00, 0x07, 0xbd, 0xfd
+  }, 
+  { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x20, 0x21, 0x22, 0x23 
+  },
+  { 0x00, 0x07, 0xbd, 0xfd, 0x5c, 0xbd, 0x60, 0x27, 
+    0x8d, 0xcc, 0x09, 0x12, 0x00, 0x00, 0x00, 0x01, 
+    0x00, 0x07, 0xbd, 0xfd, 0x5c, 0xbd, 0x60, 0x27, 
+    0x8d, 0xcc, 0x09, 0x12, 0x00, 0x00, 0x00, 0x03, 
+    0x00, 0x07, 0xbd, 0xfd, 0x5c, 0xbd, 0x60, 0x27, 
+    0x8d, 0xcc, 0x09, 0x12, 0x00, 0x00, 0x00, 0x02
+  }, 
+  { 0x96, 0x88, 0x3d, 0xc6, 0x5a, 0x59, 0x74, 0x28, 
+    0x5c, 0x02, 0x77, 0xda, 0xd1, 0xfa, 0xe9, 0x57, 
+    0xc2, 0x99, 0xae, 0x86, 0xd2, 0x84, 0x73, 0x9f, 
+    0x5d, 0x2f, 0xd2, 0x0a, 0x7a, 0x32, 0x3f, 0x97, 
+    0x8b, 0xcf, 0x2b, 0x16, 0x39, 0x99, 0xb2, 0x26, 
+    0x15, 0xb4, 0x9c, 0xd4, 0xfe, 0x57, 0x39, 0x98 
+  },
+  { 0x96, 0x89, 0x3f, 0xc5, 0x5e, 0x5c, 0x72, 0x2f, 
+    0x54, 0x0b, 0x7d, 0xd1, 0xdd, 0xf7, 0xe7, 0x58, 
+    0xd2, 0x88, 0xbc, 0x95, 0xc6, 0x91, 0x65, 0x88, 
+    0x45, 0x36, 0xc8, 0x11, 0x66, 0x2f, 0x21, 0x88, 
+    0xab, 0xee, 0x09, 0x35 
+  }
+ },
+ {  32, 16, /* Vector 7 */
+  { 0x77, 0x6b, 0xef, 0xf2, 0x85, 0x1d, 0xb0, 0x6f, 
+    0x4c, 0x8a, 0x05, 0x42, 0xc8, 0x69, 0x6f, 0x6c, 
+    0x6a, 0x81, 0xaf, 0x1e, 0xec, 0x96, 0xb4, 0xd3, 
+    0x7f, 0xc1, 0xd6, 0x89, 0xe6, 0xc1, 0xc1, 0x04
+  },
+  { 0xdb, 0x56, 0x72, 0xc9, 0x7a, 0xa8, 0xf0, 0xb2 
+  }, 
+  { 0x00, 0x00, 0x00, 0x60
+  }, 
+  /* "Single block msg" */
+  { 0x53, 0x69, 0x6e, 0x67, 0x6c, 0x65, 0x20, 0x62, 
+    0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x6d, 0x73, 0x67
+  },
+  { 0x00, 0x00, 0x00, 0x60, 0xdb, 0x56, 0x72, 0xc9, 
+    0x7a, 0xa8, 0xf0, 0xb2, 0x00, 0x00, 0x00, 0x01
+  },
+  { 0x47, 0x33, 0xbe, 0x7a, 0xd3, 0xe7, 0x6e, 0xa5, 
+    0x3a, 0x67, 0x00, 0xb7, 0x51, 0x8e, 0x93, 0xa7
+  }, 
+  { 0x14, 0x5a, 0xd0, 0x1d, 0xbf, 0x82, 0x4e, 0xc7, 
+    0x56, 0x08, 0x63, 0xdc, 0x71, 0xe3, 0xe0, 0xc0 
+  }
+ },
+ {  32, 32, /* Vector 8 */
+  { 0xf6, 0xd6, 0x6d, 0x6b, 0xd5, 0x2d, 0x59, 0xbb, 
+    0x07, 0x96, 0x36, 0x58, 0x79, 0xef, 0xf8, 0x86, 
+    0xc6, 0x6d, 0xd5, 0x1a, 0x5b, 0x6a, 0x99, 0x74, 
+    0x4b, 0x50, 0x59, 0x0c, 0x87, 0xa2, 0x38, 0x84 
+  },
+  { 0xc1, 0x58, 0x5e, 0xf1, 0x5a, 0x43, 0xd8, 0x75
+  }, 
+  { 0x00, 0xfa, 0xac, 0x24
+  },
+  { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f 
+  },
+  { 0x00, 0xfa, 0xac, 0x24, 0xc1, 0x58, 0x5e, 0xf1, 
+    0x5a, 0x43, 0xd8, 0x75, 0x00, 0x00, 0x00, 0x01, 
+    0x00, 0xfa, 0xac, 0x24, 0xc1, 0x58, 0x5e, 0xf1, 
+    0x5a, 0x43, 0xd8, 0x75, 0x00, 0x00, 0x00, 0x02 
+  },
+  { 0xf0, 0x5f, 0x21, 0x18, 0x3c, 0x91, 0x67, 0x2b, 
+    0x41, 0xe7, 0x0a, 0x00, 0x8c, 0x43, 0xbc, 0xa6, 
+    0xa8, 0x21, 0x79, 0x43, 0x9b, 0x96, 0x8b, 0x7d, 
+    0x4d, 0x29, 0x99, 0x06, 0x8f, 0x59, 0xb1, 0x03 
+  },
+  { 0xf0, 0x5e, 0x23, 0x1b, 0x38, 0x94, 0x61, 0x2c, 
+    0x49, 0xee, 0x00, 0x0b, 0x80, 0x4e, 0xb2, 0xa9, 
+    0xb8, 0x30, 0x6b, 0x50, 0x8f, 0x83, 0x9d, 0x6a, 
+    0x55, 0x30, 0x83, 0x1d, 0x93, 0x44, 0xaf, 0x1c 
+  }
+ },
+ {  32, 36, /* Vector 9 */
+  { 0xff, 0x7a, 0x61, 0x7c, 0xe6, 0x91, 0x48, 0xe4, 
+    0xf1, 0x72, 0x6e, 0x2f, 0x43, 0x58, 0x1d, 0xe2, 
+    0xaa, 0x62, 0xd9, 0xf8, 0x05, 0x53, 0x2e, 0xdf, 
+    0xf1, 0xee, 0xd6, 0x87, 0xfb, 0x54, 0x15, 0x3d
+  },
+  { 0x51, 0xa5, 0x1d, 0x70, 0xa1, 0xc1, 0x11, 0x48
+  },
+  { 0x00, 0x1c, 0xc5, 0xb7
+  },
+  { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 
+    0x20, 0x21, 0x22, 0x23 
+  },
+  { 0x00, 0x1c, 0xc5, 0xb7, 0x51, 0xa5, 0x1d, 0x70, 
+    0xa1, 0xc1, 0x11, 0x48, 0x00, 0x00, 0x00, 0x01, 
+    0x00, 0x1c, 0xc5, 0xb7, 0x51, 0xa5, 0x1d, 0x70, 
+    0xa1, 0xc1, 0x11, 0x48, 0x00, 0x00, 0x00, 0x02, 
+    0x00, 0x1c, 0xc5, 0xb7, 0x51, 0xa5, 0x1d, 0x70, 
+    0xa1, 0xc1, 0x11, 0x48, 0x00, 0x00, 0x00, 0x03 
+  },
+  { 0xeb, 0x6d, 0x50, 0x81, 0x19, 0x0e, 0xbd, 0xf0, 
+    0xc6, 0x7c, 0x9e, 0x4d, 0x26, 0xc7, 0x41, 0xa5, 
+    0xa4, 0x16, 0xcd, 0x95, 0x71, 0x7c, 0xeb, 0x10, 
+    0xec, 0x95, 0xda, 0xae, 0x9f, 0xcb, 0x19, 0x00, 
+    0x3e, 0xe1, 0xc4, 0x9b, 0xc6, 0xb9, 0xca, 0x21, 
+    0x3f, 0x6e, 0xe2, 0x71, 0xd0, 0xa9, 0x33, 0x39
+  }, 
+  { 0xeb, 0x6c, 0x52, 0x82, 0x1d, 0x0b, 0xbb, 0xf7, 
+    0xce, 0x75, 0x94, 0x46, 0x2a, 0xca, 0x4f, 0xaa, 
+    0xb4, 0x07, 0xdf, 0x86, 0x65, 0x69, 0xfd, 0x07, 
+    0xf4, 0x8c, 0xc0, 0xb5, 0x83, 0xd6, 0x07, 0x1f, 
+    0x1e, 0xc0, 0xe6, 0xb8 
+  }
+ }
+};
+
+void rfc3686_inc(unsigned char ctr_buf[AES_BLOCK_SIZE])
+{
+    if(!(++(ctr_buf[15])))
+        if(!(++(ctr_buf[14])))
+            if(!(++(ctr_buf[13])))
+                ++(ctr_buf[12]);
+}
+
+void rfc3686_init( unsigned char nonce[4], unsigned char iv[8], unsigned char ctr_buf[AES_BLOCK_SIZE])
+{
+    memcpy(ctr_buf, nonce, 4);
+    memcpy(ctr_buf +  4, iv, 8);
+    memset(ctr_buf + 12, 0, 4); 
+    rfc3686_inc(ctr_buf);
+}
+
+AES_RETURN rfc3686_crypt(const unsigned char *ibuf, unsigned char *obuf, int len, 
+                                                unsigned char *cbuf, aes_encrypt_ctx cx[1])
+{
+    return aes_ctr_crypt(ibuf, obuf, len, cbuf, rfc3686_inc, cx);
+}
+
+void rfc3686_test(void)
+{   aes_encrypt_ctx aes_ctx[1];
+    unsigned char ctr_buf[AES_BLOCK_SIZE];
+    unsigned char obuf[36];
+    unsigned int i; 
+    
+    for( i = 0 ; i < sizeof(tests) / sizeof(test_str) ; ++i )
+    {
+        aes_encrypt_key(tests[i].key, tests[i].k_len, aes_ctx);
+        rfc3686_init(tests[i].nonce, tests[i].iv, ctr_buf);
+        rfc3686_crypt(tests[i].p_txt, obuf, tests[i].m_len, ctr_buf, aes_ctx);
+        if(memcmp(obuf, tests[i].c_txt, tests[i].m_len) != 0)
+            printf("\nerror");
+    }
+}
+
+int main(void)
+{
+    rfc3686_test();
+    return 0;
+}
--- a/src/java/kp2akeytransform/jni/aes/tablegen.c
+++ b/src/java/kp2akeytransform/jni/aes/tablegen.c
@ -0,0 +1,319 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
+
+ LICENSE TERMS
+
+ The redistribution and use of this software (with or without changes)
+ is allowed without the payment of fees or royalties provided that:
+
+  1. source code distributions include the above copyright notice, this
+     list of conditions and the following disclaimer;
+
+  2. binary distributions include the above copyright notice, this list
+     of conditions and the following disclaimer in their documentation;
+
+  3. the name of the copyright holder is not used to endorse products
+     built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue Date: 20/12/2007
+*/
+
+#define DO_TABLES
+
+#include <stdio.h>
+#include "aesopt.h"
+
+#define sb_data(w) {\
+    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
+    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
+    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
+    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
+    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
+    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
+    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
+    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
+    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
+    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
+    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
+    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
+    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
+    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
+    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
+    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
+    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
+    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
+    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
+    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
+    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
+    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
+    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
+    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
+    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
+    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
+    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
+    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
+    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
+    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
+    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
+    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
+
+#define isb_data(w) {\
+    w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
+    w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
+    w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
+    w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
+    w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
+    w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
+    w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
+    w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
+    w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
+    w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
+    w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
+    w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
+    w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
+    w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
+    w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
+    w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
+    w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
+    w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
+    w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
+    w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
+    w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
+    w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
+    w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
+    w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
+    w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
+    w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
+    w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
+    w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
+    w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
+    w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
+    w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
+    w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
+
+#define mm_data(w) {\
+    w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
+    w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
+    w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
+    w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
+    w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
+    w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
+    w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
+    w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
+    w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
+    w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
+    w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
+    w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
+    w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
+    w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
+    w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
+    w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
+    w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
+    w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
+    w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
+    w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
+    w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
+    w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
+    w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
+    w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
+    w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
+    w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
+    w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
+    w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
+    w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
+    w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
+    w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
+    w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
+
+#define rc_data(w) {\
+    w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
+    w(0x1b), w(0x36) }
+
+#define h0(x)   (x)
+
+#define w0(p)   bytes2word(p, 0, 0, 0)
+#define w1(p)   bytes2word(0, p, 0, 0)
+#define w2(p)   bytes2word(0, 0, p, 0)
+#define w3(p)   bytes2word(0, 0, 0, p)
+
+#define u0(p)   bytes2word(f2(p), p, p, f3(p))
+#define u1(p)   bytes2word(f3(p), f2(p), p, p)
+#define u2(p)   bytes2word(p, f3(p), f2(p), p)
+#define u3(p)   bytes2word(p, p, f3(p), f2(p))
+
+#define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
+#define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
+#define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
+#define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
+
+#define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
+#define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
+#define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \
+                        ^ (((x>>5) & 4) * WPOLY))
+#define f3(x)   (f2(x) ^ x)
+#define f9(x)   (f8(x) ^ x)
+#define fb(x)   (f8(x) ^ f2(x) ^ x)
+#define fd(x)   (f8(x) ^ f4(x) ^ x)
+#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
+
+#include "aestab.h"
+
+#define t_parm(m,n) "t_"#m#n, t_##m##n
+
+void rtab(FILE *f, unsigned char *h, const unsigned int t[RC_LENGTH])
+{   int i;
+
+    fprintf(f, "\nuint_32t %s[RC_LENGTH] = \n{", h);
+
+    for(i = 0; i < RC_LENGTH; ++i)
+    {
+        if(i % 4 == 0)
+            fprintf(f, "\n        ");
+        if(i != RC_LENGTH - 1)
+            fprintf(f, "0x%08x, ", t[i]);
+        else
+            fprintf(f, "0x%08x ", t[i]);
+    }
+
+    fprintf(f, "\n};\n");
+}
+
+void btab_1(FILE *f, unsigned char *h, const unsigned char t[256])
+{   int i;
+
+    fprintf(f, "\nuint_8t %s[256] = \n{", h);
+
+    for(i = 0; i < 256; ++i)
+    {
+        if(i % 8 == 0)
+            fprintf(f, "\n        ");
+        if(i != 255)
+            fprintf(f, "0x%02x, ", t[i]);
+        else
+            fprintf(f, "0x%02x ", t[i]);
+    }
+
+    fprintf(f, "\n};\n");
+}
+
+void wtab_1(FILE *f, unsigned char *h, const unsigned int t[256])
+{   int i;
+
+    fprintf(f, "\nuint_32t %s[256] = \n{", h);
+
+    for(i = 0; i < 256; ++i)
+    {
+        if(i % 4 == 0)
+            fprintf(f, "\n        ");
+        if(i != 255)
+            fprintf(f, "0x%08x, ", t[i]);
+        else
+            fprintf(f, "0x%08x ", t[i]);
+    }
+
+    fprintf(f, "\n};\n");
+}
+
+void wtab_4(FILE *f, unsigned char *h, const unsigned int t[4][256])
+{   int i, j;
+
+    fprintf(f, "\nuint_32t %s[4][256] = \n{", h);
+
+    for(i = 0; i < 4; ++i)
+    {
+        fprintf(f, "\n    {");
+
+        for(j = 0; j < 256; ++j)
+        {
+            if(j % 4 == 0)
+                fprintf(f, "\n        ");
+            if(j != 255)
+                fprintf(f, "0x%08x, ", t[i][j]);
+            else
+                fprintf(f, "0x%08x ", t[i][j]);
+        }
+
+        if(i != 3)
+            fprintf(f, "\n    },");
+        else
+            fprintf(f, "\n    }");
+    }
+
+    fprintf(f, "\n};\n");
+}
+
+int main(void)
+{   FILE *f;
+
+    f = fopen("aestab2.c", "w");
+
+    fprintf(f, "\n#include \"aes.h\"\n");
+    fprintf(f, "\n#define RC_LENGTH   (5 * (AES_BLOCK_SIZE / 4 - 2))\n");
+    fprintf(f, "\nvoid aes_init() \n{ \n}\n");
+
+    rtab(f, t_parm(r,c));
+
+#if defined( SBX_SET )
+    btab_1(f, t_parm(s,box));
+#endif
+
+#if defined( ISB_SET )
+    btab_1(f, t_parm(i,box));
+#endif
+
+#if defined( FT1_SET )
+    wtab_1(f, t_parm(f,n));
+#endif
+#if defined( FT4_SET )
+    wtab_4(f, t_parm(f,n));
+#endif
+
+#if defined( FL1_SET )
+    wtab_1(f, t_parm(f,l));
+#endif
+#if defined( FL4_SET )
+    wtab_4(f, t_parm(f,l));
+#endif
+
+#if defined( IT1_SET )
+    wtab_1(f, t_parm(i,n));
+#endif
+#if defined( IT4_SET )
+    wtab_4(f, t_parm(i,n));
+#endif
+
+#if defined( IL1_SET )
+    wtab_1(f, t_parm(i,l));
+#endif
+#if defined( IL4_SET )
+    wtab_4(f, t_parm(i,l));
+#endif
+
+#if defined( LS1_SET )
+#if !defined( FL1_SET )
+    wtab_1(f, t_parm(l,s));
+#endif
+#endif
+#if defined( LS4_SET )
+#if !defined( FL4_SET )
+    wtab_4(f, t_parm(l,s));
+#endif
+#endif
+
+#if defined( IM1_SET )
+    wtab_1(f, t_parm(i,m));
+#endif
+#if defined( IM4_SET )
+    wtab_4(f, t_parm(i,m));
+#endif
+
+    fclose(f);
+    return 0;
+}
--- a/src/java/kp2akeytransform/jni/aes/vb.txt
+++ b/src/java/kp2akeytransform/jni/aes/vb.txt
@ -0,0 +1,263 @@
+
+Private Const BlockLength = 16              ' maximum block length in bytes
+Private Const BlockLengthMax = 32           ' maximum block length in bytes
+Private Const KeyLengthMax = 32             ' maximum block length in bytes
+Private Const KeyScheduleLengthMax = 64     ' maximum key schedule length in bytes
+
+Private Type EncCtx                         ' type to hold the AES encryption context data
+  Ekey(0 To KeyScheduleLengthMax - 1) As Long
+End Type
+
+Private Type DecCtx                         ' type to hold the AES decryption context data
+  Ekey(0 To KeyScheduleLengthMax - 1) As Long
+End Type
+
+Private Type Key                            ' type to hold user key data
+ K(0 To KeyLengthMax - 1) As Byte
+End Type
+
+Private Type InOut                          ' type to hold cipher input and output blocks
+ IO(0 To BlockLength - 1) As Byte
+End Type
+
+Private Type BigInOut                       ' type to hold cipher input and output blocks
+ IO(0 To 128 * BlockLength - 1) As Byte
+End Type
+
+Rem Change "c:\temp\" in the following lines to the directory path where the AES DLL is located
+Private Declare Function AesEncryptKey128 Lib "c:\temp\aes.dll" _
+        Alias "_aes_encrypt_key128@8" (K As Key, C As EncCtx) As Integer
+Private Declare Function AesEncryptKey192 Lib "c:\temp\aes.dll" _
+        Alias "_aes_encrypt_key192@8" (K As Key, C As EncCtx) As Integer
+Private Declare Function AesEncryptKey256 Lib "c:\temp\aes.dll" _
+        Alias "_aes_encrypt_key256@8" (K As Key, C As EncCtx) As Integer
+Private Declare Function AesEncryptKey Lib "c:\temp\aes.dll" _
+        Alias "_aes_encrypt_key@12" (K As Key, ByVal N As Integer, C As EncCtx) As Integer
+Private Declare Function AesEncrypt Lib "c:\temp\aes.dll" _
+        Alias "_aes_encrypt@12" (Ib As InOut, Ob As InOut, C As EncCtx) As Integer
+Private Declare Function AesDecryptKey128 Lib "c:\temp\aes.dll" _
+        Alias "_aes_decrypt_key128@8" (K As Key, C As DecCtx) As Integer
+Private Declare Function AesDecryptKey192 Lib "c:\temp\aes.dll" _
+        Alias "_aes_decrypt_key192@8" (K As Key, C As DecCtx) As Integer
+Private Declare Function AesDecryptKey256 Lib "c:\temp\aes.dll" _
+        Alias "_aes_decrypt_key256@8" (K As Key, C As DecCtx) As Integer
+Private Declare Function AesDecryptKey Lib "c:\temp\aes.dll" _
+        Alias "_aes_decrypt_key@12" (K As Key, ByVal N As Long, C As DecCtx) As Integer
+Private Declare Function AesDecrypt Lib "c:\temp\aes.dll" _
+        Alias "_aes_decrypt@12" (Ib As InOut, Ob As InOut, C As DecCtx) As Integer
+
+Private Declare Function AesModeReset Lib "c:\temp\aes.dll" Alias "_aes_mode_reset@4" _
+        (C As EncCtx) As Integer
+Private Declare Function AesEcbEncrypt Lib "c:\temp\aes.dll" Alias "_aes_ecb_encrypt@16" _
+        (Ib As BigInOut, Ob As BigInOut, ByVal N As Long, C As EncCtx) As Integer
+Private Declare Function AesEcbDecrypt Lib "c:\temp\aes.dll" Alias "_aes_ecb_decrypt@16" _
+        (Ib As BigInOut, Ob As BigInOut, ByVal N As Long, C As DecCtx) As Integer
+Private Declare Function AesCbcEncrypt Lib "c:\temp\aes.dll" Alias "_aes_cbc_encrypt@20" _
+        (Ib As BigInOut, Ob As BigInOut, ByVal N As Long, Iv As InOut, C As EncCtx) As Integer
+Private Declare Function AesCbcDecrypt Lib "c:\temp\aes.dll" Alias "_aes_cbc_decrypt@20" _
+        (Ib As BigInOut, Ob As BigInOut, ByVal N As Long, Iv As InOut, C As DecCtx) As Integer
+Private Declare Function AesCfbEncrypt Lib "c:\temp\aes.dll" Alias "_aes_cfb_encrypt@20" _
+        (Ib As BigInOut, Ob As BigInOut, ByVal N As Long, Iv As InOut, C As EncCtx) As Integer
+Private Declare Function AesCfbDecrypt Lib "c:\temp\aes.dll" Alias "_aes_cfb_decrypt@20" _
+        (Ib As BigInOut, Ob As BigInOut, ByVal N As Long, Iv As InOut, C As EncCtx) As Integer
+Private Declare Function AesOfbCrypt Lib "c:\temp\aes.dll" Alias "_aes_ofb_crypt@20" _
+        (Ib As BigInOut, Ob As BigInOut, ByVal N As Long, Iv As InOut, C As EncCtx) As Integer
+Private Declare Function AesCtrCrypt Lib "c:\temp\aes.dll" Alias "_aes_ctr_crypt@24" _
+        (Ib As BigInOut, Ob As BigInOut, ByVal N As Long, Iv As InOut, ByVal CtrFn As Long, C As EncCtx) As Integer
+
+Private Sub Hex(X As Byte)                  ' output a byte in hexadecimal format
+Dim H As Byte
+H = Int(X / 16)
+If H < 10 Then Debug.Print Chr(48 + H); Else Debug.Print Chr(87 + H);
+H = Int(X Mod 16)
+If H < 10 Then Debug.Print Chr(48 + H); Else Debug.Print Chr(87 + H);
+End Sub
+
+Private Sub OutKey(S As String, B As Key, ByVal KeyL As Integer)   ' display a key value
+Debug.Print: Debug.Print S;
+For i = 0 To KeyL - 1
+   Hex B.K(i)
+Next i
+End Sub
+
+Private Sub OutBlock(S As String, B As InOut)   ' display an input/output block
+Debug.Print: Debug.Print S;
+For i = 0 To BlockLength - 1
+   Hex B.IO(i)
+Next i
+End Sub
+
+Private Sub OutBigBlock(S As String, B As BigInOut)   ' display an input/output block
+Debug.Print: Debug.Print S;
+For i = 0 To BlockLength - 1
+   Hex B.IO(i)
+Next i
+Debug.Print " ... ";
+For i = 127 * BlockLength To 128 * BlockLength - 1
+   Hex B.IO(i)
+Next i
+End Sub
+
+Private Sub CtrInc(Ctr As InOut)
+    Ctr.IO(0) = Ctr.IO(0) + 1
+    If (Ctr.IO(0) = 0) Then
+        Ctr.IO(1) = Ctr.IO(1) + 1
+        If (Ctr.IO(1) = 0) Then
+            Ctr.IO(2) = Ctr.IO(2) + 1
+            If (Ctr.IO(3) = 0) Then
+                Ctr.IO(3) = Ctr.IO(3) + 1
+            End If
+        End If
+    End If
+End Sub
+
+Rem The following Main routine should output the following in the immediate window:
+Rem Variable Key Length ( 16 )
+Rem Key =            00000000000000000000000000000000
+Rem Input =          00000000000000000000000000000000
+Rem Encrypted Text = 66e94bd4ef8a2c3b884cfa59ca342b2e
+Rem Decrypted Text = 00000000000000000000000000000000
+Rem Variable Key Length ( 24 )
+Rem Key =            000000000000000000000000000000000000000000000000
+Rem Input =          00000000000000000000000000000000
+Rem Encrypted Text = aae06992acbf52a3e8f4a96ec9300bd7
+Rem Decrypted Text = 00000000000000000000000000000000
+Rem Variable Key Length ( 32 )
+Rem Key =            0000000000000000000000000000000000000000000000000000000000000000
+Rem Input =          00000000000000000000000000000000
+Rem Encrypted Text = dc95c078a2408989ad48a21492842087
+Rem Decrypted Text = 00000000000000000000000000000000
+Rem Fixed Key Length ( 128 )
+Rem Key =            00000000000000000000000000000000
+Rem Input =          00000000000000000000000000000000
+Rem Encrypted Text = 66e94bd4ef8a2c3b884cfa59ca342b2e
+Rem Decrypted Text = 00000000000000000000000000000000
+Rem Fixed Key Length ( 192 )
+Rem Key =            000000000000000000000000000000000000000000000000
+Rem Input =          00000000000000000000000000000000
+Rem Encrypted Text = aae06992acbf52a3e8f4a96ec9300bd7
+Rem Decrypted Text = 00000000000000000000000000000000
+Rem Fixed Key Length ( 256 )
+Rem Key =            0000000000000000000000000000000000000000000000000000000000000000
+Rem Input =          00000000000000000000000000000000
+Rem Encrypted Text = dc95c078a2408989ad48a21492842087
+Rem Decrypted Text = 00000000000000000000000000000000
+
+Sub Main()
+Dim Key As Key                                  ' all these variables are initialised
+Dim Ib As InOut, Ob As InOut, Rb As InOut       ' to zero by VBA
+Dim Iv1 As InOut, Iv2 As InOut
+Dim Ecx As EncCtx
+Dim Dcx As DecCtx
+Dim RetVal As Integer
+
+For KeyL = 16 To 32 Step 8
+Debug.Print "Variable Key Length ("; KeyL; ")";
+OutKey "Key =            ", Key, KeyL
+OutBlock "Input =          ", Ib
+RetVal = AesEncryptKey(Key, KeyL, Ecx)          ' set an all zero encryption key
+RetVal = AesEncrypt(Ib, Ob, Ecx)                ' encrypt Ib to Ob
+OutBlock "Encrypted Text = ", Ob
+RetVal = AesDecryptKey(Key, KeyL, Dcx)          ' set an all zero decryption key
+RetVal = AesDecrypt(Ob, Rb, Dcx)                ' decrypt Ob to Rb
+OutBlock "Decrypted Text = ", Rb
+Debug.Print
+Next KeyL
+
+Debug.Print
+KeyL = 128: Debug.Print "Fixed Key Length ("; KeyL; ")";
+OutKey "Key =            ", Key, 16
+OutBlock "Input =          ", Ib
+RetVal = AesEncryptKey128(Key, Ecx)             ' set an all zero encryption key
+RetVal = AesEncrypt(Ib, Ob, Ecx)                ' encrypt Ib to Ob
+OutBlock "Encrypted Text = ", Ob
+RetVal = AesDecryptKey128(Key, Dcx)             ' set an all zero decryption key
+RetVal = AesDecrypt(Ob, Rb, Dcx)                ' decrypt Ob to Rb
+OutBlock "Decrypted Text = ", Rb
+Debug.Print
+
+Debug.Print
+KeyL = 192: Debug.Print "Fixed Key Length ("; KeyL; ")";
+OutKey "Key =            ", Key, 24
+OutBlock "Input =          ", Ib
+RetVal = AesEncryptKey192(Key, Ecx)             ' set an all zero encryption key
+RetVal = AesEncrypt(Ib, Ob, Ecx)                ' encrypt Ib to Ob
+OutBlock "Encrypted Text = ", Ob
+RetVal = AesDecryptKey192(Key, Dcx)             ' set an all zero decryption key
+RetVal = AesDecrypt(Ob, Rb, Dcx)                ' decrypt Ob to Rb
+OutBlock "Decrypted Text = ", Rb
+Debug.Print
+
+Debug.Print
+KeyL = 256: Debug.Print "Fixed Key Length ("; KeyL; ")";
+OutKey "Key =            ", Key, 32
+OutBlock "Input =          ", Ib
+RetVal = AesEncryptKey256(Key, Ecx)             ' set an all zero encryption key
+RetVal = AesEncrypt(Ib, Ob, Ecx)                ' encrypt Ib to Ob
+OutBlock "Encrypted Text = ", Ob
+RetVal = AesDecryptKey256(Key, Dcx)             ' set an all zero decryption key
+RetVal = AesDecrypt(Ob, Rb, Dcx)                ' decrypt Ob to Rb
+OutBlock "Decrypted Text = ", Rb
+Debug.Print
+
+Debug.Print
+KeyL = 128: Debug.Print "Fixed Key Length ("; KeyL; ")";
+OutKey "Key =            ", Key, 16
+OutBlock "Input =          ", Ib
+RetVal = AesEncryptKey128(Key, Ecx)             ' set an all zero encryption key
+OutBlock "Encrypted Text = ", Ob
+RetVal = AesDecryptKey128(Key, Dcx)             ' set an all zero decryption key
+OutBlock "Decrypted Text = ", Rb
+Debug.Print
+
+Debug.Print
+KeyL = 128: Debug.Print "Fixed Key Length ("; KeyL; ")";
+OutKey "Key =            ", Key, 16
+RetVal = AesEncryptKey128(Key, Ecx)             ' set an all zero encryption key
+RetVal = AesDecryptKey128(Key, Dcx)             ' set an all zero decryption key
+Dim Pt1 As BigInOut, Pt2 As BigInOut, Ct As BigInOut
+
+For i = 0 To 128 * BlockLength - 1
+    Pt1.IO(i) = i Mod 256
+Next i
+
+OutBigBlock "ECB Input =      ", Pt1
+RetVal = AesEcbEncrypt(Pt1, Ct, 128 * BlockLength, Ecx)
+OutBigBlock "Encrypted Text = ", Ct
+RetVal = AesEcbDecrypt(Ct, Pt2, 128 * BlockLength, Dcx)
+OutBigBlock "Decrypted Text = ", Pt2
+Debug.Print
+
+OutBigBlock "CBC Mode Input = ", Pt1
+RetVal = AesCbcEncrypt(Pt1, Ct, 128 * BlockLength, Iv1, Ecx)
+OutBigBlock "Encrypted Text = ", Ct
+RetVal = AesCbcDecrypt(Ct, Pt2, 128 * BlockLength, Iv2, Dcx)
+OutBigBlock "Decrypted Text = ", Pt2
+Debug.Print
+
+OutBigBlock "CFB Mode Input = ", Pt1
+RetVal = AesCfbEncrypt(Pt1, Ct, 128 * BlockLength, Iv1, Ecx)
+OutBigBlock "Encrypted Text = ", Ct
+RetVal = AesCfbDecrypt(Ct, Pt2, 128 * BlockLength, Iv2, Ecx)
+OutBigBlock "Decrypted Text = ", Pt2
+Debug.Print
+
+OutBigBlock "OFB Mode Input = ", Pt1
+RetVal = AesOfbCrypt(Pt1, Ct, 128 * BlockLength, Iv1, Ecx)
+OutBigBlock "Encrypted Text = ", Ct
+RetVal = AesOfbCrypt(Ct, Pt2, 128 * BlockLength, Iv2, Ecx)
+OutBigBlock "Decrypted Text = ", Pt2
+Debug.Print
+
+#If False Then
+Rem CTR Mode is not working because of a problem with the 'AddressOf' operator
+OutBigBlock "CTR Mode Input = ", Pt1
+RetVal = AesCtrCrypt(Pt1, Ct, 128 * BlockLength, Iv1, AddressOf CtrInc, Ecx)
+OutBigBlock "Encrypted Text = ", Ct
+RetVal = AesCtrCrypt(Ct, Pt2, 128 * BlockLength, Iv2, AddressOf CtrInc, Ecx)
+OutBigBlock "Decrypted Text = ", Pt2
+Debug.Print
+#End If
+
+Debug.Print
+End Sub
--- a/src/java/kp2akeytransform/jni/aes/vbaxam.doc
+++ b/src/java/kp2akeytransform/jni/aes/vbaxam.doc
--- a/src/java/kp2akeytransform/jni/aes/via_ace.txt
+++ b/src/java/kp2akeytransform/jni/aes/via_ace.txt
@ -0,0 +1,158 @@
+
+Support for the VIA Nehemiah Advanced Cryptography Engine (ACE)
+---------------------------------------------------------------
+
+A. Introduction
+
+The AES code now supports the VIA ACE engine.  The engine is invoked by the
+multiple block AES modes calls in aes_modes.c and not by the basic AES code.
+
+The define USE_VIA_ACE_IF_PRESENT is defined if VIA ACE detection and use is
+required with fallback to the normal AES code if it is not present.
+
+The define ASSUME_VIA_ACE_PRESENT is used when it is known that the VIA ACE
+engine will always be present.  Note, however, that this code will not work
+correctly if the VIA ACE engine is either not present or turned off.
+
+To enable ACE support the appropriate defines in section 2 of the options in
+aesopt.h must be set.  If ACE support is required then key scheduling must
+use the C code so only the generic C code in Win32 mode, ASM_X86_V1C and
+ASM_X86_V2C assembler code can be used (i.e ASM_X86_V2 and ASM_AMD64_C do
+NOT support VIA ACE).
+
+B. Using ACE
+
+ACE is used in the code that implements the subroutines used for the multiple
+block AES modes defined in aes_modes.h:
+
+    // used to reset modes to their start point without entering a new key
+    AES_RETURN aes_mode_reset(aes_encrypt_ctx cx[1]);
+
+    AES_RETURN aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_encrypt_ctx cx[1]);
+
+    AES_RETURN aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_decrypt_ctx cx[1]);
+
+    AES_RETURN aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_encrypt_ctx cx[1]);
+
+    AES_RETURN aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, const aes_decrypt_ctx cx[1]);
+
+    AES_RETURN aes_cfb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+    AES_RETURN aes_cfb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+    #define aes_ofb_encrypt aes_ofb_crypt
+    #define aes_ofb_decrypt aes_ofb_crypt
+
+    AES_RETURN aes_ofb_crypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, unsigned char *iv, aes_encrypt_ctx cx[1]);
+
+    typedef void cbuf_inc(unsigned char *cbuf);
+
+    #define aes_ctr_encrypt aes_ctr_crypt
+    #define aes_ctr_decrypt aes_ctr_crypt
+
+    AES_RETURN aes_ctr_crypt(const unsigned char *ibuf, unsigned char *obuf,
+       int len, unsigned char *cbuf, cbuf_inc ctr_inc, aes_encrypt_ctx cx[1]);
+
+Note that the single block AES calls defined in aes.h:
+
+    AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out,
+                    const aes_encrypt_ctx cx[1]);
+
+    AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out,
+                    const aes_decrypt_ctx cx[1]);
+
+do NOT provide ACE support and should not be used if the ACE engine is
+available and ACE support is required.
+
+C. Constraints and Optimisation
+
+There are several constraints that have to be observed when ACE is used if
+the best performance is to be achieved:
+
+1. As usual the appropriate key set up subroutine must be called before any
+   of the above subroutines are used.
+
+2. The AES contexts - aes_encryption_ctx and aes_decryption_ctx - used with
+   these subroutines MUST be 16 byte aligned.  Failure to align AES contexts
+   will often cause memory alignment exceptions.
+
+3. The buffers used for inputs, outputs and IVs do not need to be 16 byte
+   aligned but the speed that is achieved will be much higher if this can be
+   arranged. In a flat address space (as now typical in 32-bit systems) this
+   means that: (a) that the lower nibble of all buffer addresses must be
+   zero, and (b) the compiler used must arrange to load the data and stack
+   segments on 16 byte address boundaries.  The Microsoft VC++ compiler can
+   align all variables in this way (see the example macros for doing this in
+   aes_via_ace.txt). However it seems that the GCC compiler will only do this
+   for static global variables but not for variables placed on the stack, that
+   is local variables.
+
+4. The data length in bytes (len) in calls to the ECB and CBC subroutines
+   must be a multiple of the 16 byte block length. An error return will
+   occur if this is not so.
+
+5. The data length in all calls to the CFB, OFB and CTR subroutines must also
+   be a multiple of 16 bytes if the VIA ACE engine is to be used. Otherwise
+   these lengths can be of any value but the subroutines will only proceed at
+   full speed for lengths that are multiples of 16 bytes. The CFB, OFB and
+   CTR subroutines are incremental, with subsequent calls continuing from
+   where previous calls finished.  The subroutine aes_mode_reset() can be used
+   to restart a mode without a key change but is not needed after a new key is
+   entered. Such a reset is not needed when the data lengths in all individual
+   calls to the AES mode subroutines are multiples of 16 bytes.
+
+6. Note that the AES context contains mode details so only one type of mode
+   can be run from a context at any one time.  A reset is necessary if a new
+   mode is used without a new context or a new key.
+
+D. Expected Speeds
+
+The speeds that have been obtained using a 1.2 GHz VIA C3 processor with
+this code are given below (note that since CTR mode is not available in
+the VIA hardware it is not present in the aligned timing figures):
+
+AES Timing (Cycles/Byte) with the VIA ACE Engine (aligned in C)
+Mode   Blocks:      1       10      100     1000    Peak Throughput
+ecb encrypt      8.25     1.36     0.69     0.63  1.9 Gbytes/second
+ecb decrypt      8.75     1.41     0.70     0.64  1.9 Gbytes/second
+cbc encrypt     11.56     2.41     1.47     1.38  870 Mbytes/second
+cbc decrypt     12.37     2.38     1.47     1.38  870 Mbytes/second
+cfb encrypt     11.93     2.46     1.48     1.38  870 Mbytes/second
+cfb decrypt     12.18     2.36     1.47     1.38  870 Mbytes/second
+ofb encrypt     13.31     3.88     2.92     2.82  425 Mbytes/second
+ofb decrypt     13.31     3.88     2.92     2.82  425 Mbytes/second
+
+AES Timing (Cycles/Byte) with the VIA ACE Engine (unaligned in C)
+Mode   Blocks:      1       10      100     1000    Peak Throughput
+ecb encrypt     17.68     4.31     3.15     3.05  390 Mbytes/second
+ecb decrypt     18.12     4.36     3.17     3.06  390 Mbytes/second
+cbc encrypt     20.68     5.70     4.39     4.27  280 Mbytes/second
+cbc decrypt     21.87     5.75     4.34     4.21  285 Mbytes/second
+cfb encrypt     21.06     5.81     4.43     4.31  280 Mbytes/second
+cfb decrypt     21.37     5.72     4.36     4.24  285 Mbytes/second
+ofb encrypt     22.43     7.23     5.85     5.72  210 Mbytes/second
+ofb decrypt     22.43     7.34     5.86     5.73  210 Mbytes/second
+ctr encrypt     16.43     6.90     6.00     5.89  205 Mbytes/second
+ctr decrypt     16.43     6.90     6.00     5.89  205 Mbytes/second
+
+AES Timing (Cycles/Byte) with the VIA ACE Engine (unaligned assembler)
+Mode   Blocks:      1       10      100     1000    Peak Throughput
+ecb encrypt     11.87     2.89     1.91     1.83  660 Mbytes/second
+ecb decrypt     12.18     2.83     1.97     1.87  640 Mbytes/second
+cbc encrypt     14.87     4.13     3.11     3.01  400 Mbytes/second
+cbc decrypt     14.43     3.87     2.89     2.80  430 Mbytes/second
+cfb encrypt     14.75     4.12     3.10     3.01  400 Mbytes/second
+cfb decrypt     14.12     4.10     2.88     2.79  430 Mbytes/second
+ofb encrypt     15.25     5.36     4.37     4.27  280 Mbytes/second
+ofb decrypt     15.25     5.36     4.36     4.27  280 Mbytes/second
+ctr encrypt     13.31     4.79     4.01     3.94  305 Mbytes/second
+ctr decrypt     13.31     4.79     4.01     3.94  305 Mbytes/second
+
+    Brian Gladman, Worcester, UK
--- a/src/java/kp2akeytransform/jni/final_key/Android.mk
+++ b/src/java/kp2akeytransform/jni/final_key/Android.mk
@ -0,0 +1,16 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := final-key
+
+LOCAL_SRC_FILES := \
+	kpd_jni.c
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/../sha $(LOCAL_PATH)/../aes
+
+LOCAL_STATIC_LIBRARIES := aes sha
+
+LOCAL_LDLIBS := -llog
+
+include $(BUILD_SHARED_LIBRARY)
--- a/src/java/kp2akeytransform/jni/final_key/kpd_jni.c
+++ b/src/java/kp2akeytransform/jni/final_key/kpd_jni.c
@ -0,0 +1,512 @@
+/*
+  This is a JNI wrapper for AES & SHA source code on Android.
+  Copyright (C) 2010 Michael Mohr
+
+  This program is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <pthread.h>
+#include <jni.h>
+
+/* Tune as desired */
+#undef KPD_PROFILE
+#undef KPD_DEBUG
+
+#if defined(KPD_PROFILE)
+#include <time.h>
+#endif
+
+#if defined(KPD_DEBUG)
+#include <android/log.h>
+#endif
+
+#include "aes.h"
+#include "sha2.h"
+
+static JavaVM *cached_vm;
+static jclass bad_arg, no_mem, bad_padding, short_buf, block_size;
+
+typedef enum {
+  ENCRYPTION,
+  DECRYPTION,
+  FINALIZED
+} edir_t;
+
+#define AES_BLOCK_SIZE 16
+#define CACHE_SIZE 32
+
+typedef struct _aes_state {
+  edir_t direction;
+  uint32_t cache_len;
+  uint8_t iv[16], cache[CACHE_SIZE];
+  uint8_t ctx[sizeof(aes_encrypt_ctx)]; // 244
+} aes_state;
+
+#define ENC_CTX(state) (((aes_encrypt_ctx *)((state)->ctx)))
+#define DEC_CTX(state) (((aes_decrypt_ctx *)((state)->ctx)))
+#define ALIGN_EXTRA 15
+#define ALIGN16(x) (void *)(((uintptr_t)(x)+ALIGN_EXTRA) & ~ 0x0F)
+
+JNIEXPORT jint JNICALL JNI_OnLoad( JavaVM *vm, void *reserved ) {
+  JNIEnv *env;
+  jclass cls;
+
+  cached_vm = vm;
+  if((*vm)->GetEnv(vm, (void **)&env, JNI_VERSION_1_6))
+    return JNI_ERR;
+
+  cls = (*env)->FindClass(env, "java/lang/IllegalArgumentException");
+  if( cls == NULL )
+    return JNI_ERR;
+  bad_arg = (*env)->NewGlobalRef(env, cls);
+  if( bad_arg == NULL )
+    return JNI_ERR;
+
+  cls = (*env)->FindClass(env, "java/lang/OutOfMemoryError");
+  if( cls == NULL )
+    return JNI_ERR;
+  no_mem = (*env)->NewGlobalRef(env, cls);
+  if( no_mem == NULL )
+    return JNI_ERR;
+
+  cls = (*env)->FindClass(env, "javax/crypto/BadPaddingException");
+  if( cls == NULL )
+    return JNI_ERR;
+  bad_padding = (*env)->NewGlobalRef(env, cls);
+
+  cls = (*env)->FindClass(env, "javax/crypto/ShortBufferException");
+  if( cls == NULL )
+    return JNI_ERR;
+  short_buf = (*env)->NewGlobalRef(env, cls);
+
+  cls = (*env)->FindClass(env, "javax/crypto/IllegalBlockSizeException");
+  if( cls == NULL )
+    return JNI_ERR;
+  block_size = (*env)->NewGlobalRef(env, cls);
+
+  aes_init();
+
+  return JNI_VERSION_1_6;
+}
+
+// called on garbage collection
+JNIEXPORT void JNICALL JNI_OnUnload( JavaVM *vm, void *reserved ) {
+  JNIEnv *env;
+  if((*vm)->GetEnv(vm, (void **)&env, JNI_VERSION_1_6)) {
+    return;
+  }
+  (*env)->DeleteGlobalRef(env, bad_arg);
+  (*env)->DeleteGlobalRef(env, no_mem);
+  (*env)->DeleteGlobalRef(env, bad_padding);
+  (*env)->DeleteGlobalRef(env, short_buf);
+  (*env)->DeleteGlobalRef(env, block_size);
+  return;
+}
+
+JNIEXPORT jlong JNICALL Java_com_keepassdroid_crypto_NativeAESCipherSpi_nInit(JNIEnv *env, jobject this, jboolean encrypting, jbyteArray key, jbyteArray iv) {
+  uint8_t ckey[32];
+  aes_state *state;
+  jint key_len = (*env)->GetArrayLength(env, key);
+  jint iv_len = (*env)->GetArrayLength(env, iv);
+
+  if( ! ( key_len == 16 || key_len == 24 || key_len == 32 ) || iv_len != 16 ) {
+    (*env)->ThrowNew(env, bad_arg, "Invalid length of key or iv");
+    return -1;
+  }
+
+  state = (aes_state *)malloc(sizeof(aes_state));
+  if( state == NULL ) {
+    (*env)->ThrowNew(env, no_mem, "Cannot allocate memory for the encryption state");
+    return -1;
+  }
+  memset(state, 0, sizeof(aes_state));
+
+  (*env)->GetByteArrayRegion(env, key, (jint)0, key_len, (jbyte *)ckey);
+  (*env)->GetByteArrayRegion(env, iv, (jint)0, iv_len, (jbyte *)state->iv);
+
+  if( encrypting ) {
+    state->direction = ENCRYPTION;
+    aes_encrypt_key(ckey, key_len, ENC_CTX(state));
+  } else {
+    state->direction = DECRYPTION;
+    aes_decrypt_key(ckey, key_len, DEC_CTX(state));
+  }
+
+  return (jlong)state;
+}
+
+JNIEXPORT void JNICALL Java_com_keepassdroid_crypto_NativeAESCipherSpi_nCleanup(JNIEnv *env, jclass this, jlong state) {
+  if( state <= 0 ) return;
+  free((void *)state);
+}
+
+/*
+  TODO:
+  It seems like the android implementation of the AES cipher stays a
+  block behind with update calls. So, if you do an update for 16 bytes,
+  it will return nothing in the output buffer.  Then, it is the finalize
+  call that will return the last block stripping off padding if it is
+  not a full block.
+*/
+
+JNIEXPORT jint JNICALL Java_com_keepassdroid_crypto_NativeAESCipherSpi_nUpdate(JNIEnv *env, jobject this,
+	jlong state, jbyteArray input, jint inputOffset, jint inputLen, jbyteArray output, jint outputOffset, jint outputSize) {
+  int aes_ret;
+  uint32_t outLen, bytes2cache, cryptLen;
+  void *in, *out;
+  uint8_t *c_input, *c_output;
+  aes_state *c_state;
+
+  #if defined(KPD_DEBUG)
+  __android_log_print(ANDROID_LOG_INFO, "kpd_jni.c/nUpdate", "entry: inputLen=%d, outputSize=%d", inputLen, outputSize);
+  #endif
+
+  // step 1: first, some housecleaning
+  if( !inputLen || !outputSize || outputOffset < 0 || state <= 0 || !input || !output ) {
+    (*env)->ThrowNew(env, bad_arg, "nUpdate: called with 1 or more invalid arguments");
+    return -1;
+  }
+  c_state = (aes_state *)state;
+  if( c_state->direction == FINALIZED ) {
+    (*env)->ThrowNew(env, bad_arg, "Trying to update a finalized state");
+    return -1;
+  }
+
+  // step 1.5: calculate cryptLen and outLen
+  cryptLen = inputLen + c_state->cache_len;
+  if( cryptLen < CACHE_SIZE ) {
+    (*env)->GetByteArrayRegion(env, input, inputOffset, inputLen, (jbyte *)(c_state->cache + c_state->cache_len));
+    c_state->cache_len = cryptLen;
+    return 0;
+  }
+  // now we're guaranteed that cryptLen >= CACHE_SIZE (32)
+  bytes2cache = (cryptLen & 15) + AES_BLOCK_SIZE; // mask bottom 4 bits plus 1 block
+  outLen = (cryptLen - bytes2cache); // output length is now aligned to a 16-byte boundary
+  if( outLen > (uint32_t)outputSize ) {
+    (*env)->ThrowNew(env, bad_arg, "Output buffer does not have enough space");
+    return -1;
+  }
+
+  // step 2: allocate memory to hold input and output data
+  in = malloc(cryptLen+ALIGN_EXTRA);
+  if( in == NULL ) {
+    (*env)->ThrowNew(env, no_mem, "Unable to allocate heap space for encryption input");
+    return -1;
+  }
+  c_input = ALIGN16(in);
+
+  out = malloc(outLen+ALIGN_EXTRA);
+  if( out == NULL ) {
+    free(in);
+    (*env)->ThrowNew(env, no_mem, "Unable to allocate heap space for encryption output");
+    return -1;
+  }
+  c_output = ALIGN16(out);
+
+  // step 3: copy data from Java and en/decrypt it
+  if( c_state->cache_len ) {
+    memcpy(c_input, c_state->cache, c_state->cache_len);
+    (*env)->GetByteArrayRegion(env, input, inputOffset, inputLen, (jbyte *)(c_input + c_state->cache_len));
+  } else {
+    (*env)->GetByteArrayRegion(env, input, inputOffset, inputLen, (jbyte *)c_input);
+  }
+  if( c_state->direction == ENCRYPTION )
+    aes_ret = aes_cbc_encrypt(c_input, c_output, outLen, c_state->iv, ENC_CTX(c_state));
+  else
+    aes_ret = aes_cbc_decrypt(c_input, c_output, outLen, c_state->iv, DEC_CTX(c_state));
+  if( aes_ret != EXIT_SUCCESS ) {
+    free(in);
+    free(out);
+    (*env)->ThrowNew(env, bad_arg, "Failed to encrypt input data"); // FIXME: get a better exception class for this...
+    return -1;
+  }
+  (*env)->SetByteArrayRegion(env, output, outputOffset, outLen, (jbyte *)c_output);
+
+  // step 4: cleanup and return
+  if( bytes2cache ) {
+    c_state->cache_len = bytes2cache; // set new cache length
+    memcpy(c_state->cache, (c_input + outLen), bytes2cache); // cache overflow bytes for next call
+  } else {
+    c_state->cache_len = 0;
+  }
+
+  free(in);
+  free(out);
+
+  #if defined(KPD_DEBUG)
+  __android_log_print(ANDROID_LOG_INFO, "kpd_jni.c/nUpdate", "exit: outLen=%d", outLen);
+  #endif
+
+  return outLen;
+}
+
+/*
+  outputSize must be at least 32 for encryption since the buffer may contain >= 1 full block
+  outputSize must be at least 16 for decryption
+*/
+JNIEXPORT jint JNICALL Java_com_keepassdroid_crypto_NativeAESCipherSpi_nFinal(JNIEnv *env, jobject this,
+	jlong state, jboolean doPadding, jbyteArray output, jint outputOffset, jint outputSize) {
+  int i;
+  uint32_t padValue, paddedCacheLen, j;
+  uint8_t final_output[CACHE_SIZE] __attribute__ ((aligned (16)));
+  aes_state *c_state;
+
+  #if defined(KPD_DEBUG)
+  __android_log_print(ANDROID_LOG_INFO, "kpd_jni.c/nFinal", "entry: outputOffset=%d, outputSize=%d", outputOffset, outputSize);
+  #endif
+
+  if( !output || outputOffset < 0 || state <= 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "Invalid argument(s) passed to nFinal");
+    return -1;
+  }
+  c_state = (aes_state *)state;
+  if( c_state->direction == FINALIZED ) {
+    (*env)->ThrowNew(env, bad_arg, "This state has already been finalized");
+    return -1;
+  }
+
+  // allow fetching of remaining bytes from cache
+  if( !doPadding ) {
+    (*env)->SetByteArrayRegion(env, output, outputOffset, c_state->cache_len, (jbyte *)c_state->cache);
+    c_state->direction = FINALIZED;
+    return c_state->cache_len;
+  }
+
+  #if defined(KPD_DEBUG)
+  __android_log_print(ANDROID_LOG_INFO, "kpd_jni.c/nFinal", "crypto operation starts");
+  #endif
+
+  if( c_state->direction == ENCRYPTION ) {
+    if( c_state->cache_len >= 16 ) {
+      paddedCacheLen = 32;
+    } else {
+      paddedCacheLen = 16;
+    }
+    if( outputSize < (jint)paddedCacheLen ) {
+      (*env)->ThrowNew(env, short_buf, "Insufficient space in output buffer");
+      return -1;
+    }
+    padValue = paddedCacheLen - c_state->cache_len;
+    if(!padValue) padValue = 16;
+    memset(c_state->cache + c_state->cache_len, padValue, padValue);
+    if( aes_cbc_encrypt(c_state->cache, final_output, paddedCacheLen, c_state->iv, ENC_CTX(c_state)) != EXIT_SUCCESS ) {
+      (*env)->ThrowNew(env, bad_arg, "Failed to encrypt the final data block(s)"); // FIXME: get a better exception class for this...
+      return -1;
+    }
+    (*env)->SetByteArrayRegion(env, output, outputOffset, paddedCacheLen, (jbyte *)final_output);
+    c_state->direction = FINALIZED;
+    #if defined(KPD_DEBUG)
+    __android_log_print(ANDROID_LOG_INFO, "kpd_jni.c/nFinal", "encryption operation completed, returning %d bytes", paddedCacheLen);
+    #endif
+    return paddedCacheLen;
+  } else { // DECRYPTION
+    paddedCacheLen = c_state->cache_len;
+    if( outputSize < (jint)paddedCacheLen ) {
+      (*env)->ThrowNew(env, short_buf, "Insufficient space in output buffer");
+      return -1;
+    }
+    if( paddedCacheLen != 16 ) {
+      (*env)->ThrowNew(env, bad_padding, "Incomplete final block in cache for decryption state");
+      return -1;
+    }
+    if( aes_cbc_decrypt(c_state->cache, final_output, paddedCacheLen, c_state->iv, DEC_CTX(c_state)) != EXIT_SUCCESS ) {
+      (*env)->ThrowNew(env, bad_arg, "Failed to decrypt the final data block(s)"); // FIXME: get a better exception class for this...
+      return -1;
+    }
+    padValue = final_output[paddedCacheLen-1];
+    for(i = (paddedCacheLen-1), j = 0; final_output[i] == padValue && i >= 0; i--, j++);
+    if( padValue != j ) {
+      (*env)->ThrowNew(env, bad_padding, "Failed to verify padding during decryption");
+      return -1;
+    }
+    j = 16 - j;
+    (*env)->SetByteArrayRegion(env, output, outputOffset, j, (jbyte *)final_output);
+    c_state->direction = FINALIZED;
+    #if defined(KPD_DEBUG)
+    __android_log_print(ANDROID_LOG_INFO, "kpd_jni.c/nFinal", "decryption operation completed, returning %d bytes", j);
+    #endif
+    return j;
+  }
+}
+
+JNIEXPORT jint JNICALL Java_com_keepassdroid_crypto_NativeAESCipherSpi_nGetCacheSize(JNIEnv* env, jobject this, jlong state) {
+  aes_state *c_state;
+
+  if( state <= 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "Invalid state");
+    return -1;
+  }
+  c_state = (aes_state *)state;
+  if( c_state->direction == FINALIZED ) {
+    (*env)->ThrowNew(env, bad_arg, "Invalid state");
+    return -1;
+  }
+  return c_state->cache_len;
+}
+
+#define MASTER_KEY_SIZE 32
+
+typedef struct _master_key {
+  uint32_t rounds, done[2];
+  pthread_mutex_t lock1, lock2; // these lock the two halves of the key material
+  uint8_t c_seed[MASTER_KEY_SIZE] __attribute__ ((aligned (16)));
+  uint8_t key1[MASTER_KEY_SIZE] __attribute__ ((aligned (16)));
+  uint8_t key2[MASTER_KEY_SIZE] __attribute__ ((aligned (16)));
+} master_key;
+
+
+void *generate_key_material(void *arg) {
+  #if defined(KPD_PROFILE)
+  struct timespec start, end;
+  #endif
+  uint32_t i, flip = 0;
+  uint8_t *key1, *key2;
+  master_key *mk = (master_key *)arg;
+  aes_encrypt_ctx e_ctx[1] __attribute__ ((aligned (16)));
+
+  if( mk->done[0] == 0 && pthread_mutex_trylock(&mk->lock1) == 0 ) {
+    key1 = mk->key1;
+    key2 = mk->key2;
+  } else if( mk->done[1] == 0 && pthread_mutex_trylock(&mk->lock2) == 0 ) {
+    key1 = mk->key1 + (MASTER_KEY_SIZE/2);
+    key2 = mk->key2 + (MASTER_KEY_SIZE/2);
+  } else {
+    // this can only be scaled to two threads
+    pthread_exit( (void *)(-1) );
+  }
+
+  #if defined(KPD_PROFILE)
+  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
+  #endif
+
+  aes_encrypt_key256(mk->c_seed, e_ctx);
+  for (i = 0; i < mk->rounds; i++) {
+    if ( flip ) {
+      aes_encrypt(key2, key1, e_ctx);
+      flip = 0;
+    } else {
+      aes_encrypt(key1, key2, e_ctx);
+      flip = 1;
+    }
+  }
+
+  #if defined(KPD_PROFILE)
+  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
+  if( key1 == mk->key1 )
+    __android_log_print(ANDROID_LOG_INFO, "kpd_jni.c/nTransformMasterKey", "Thread 1 master key transformation took ~%d seconds", (end.tv_sec-start.tv_sec));
+  else
+    __android_log_print(ANDROID_LOG_INFO, "kpd_jni.c/nTransformMasterKey", "Thread 2 master key transformation took ~%d seconds", (end.tv_sec-start.tv_sec));
+  #endif
+
+  if( key1 == mk->key1 ) {
+    mk->done[0] = 1;
+    pthread_mutex_unlock(&mk->lock1);
+  } else {
+    mk->done[1] = 1;
+    pthread_mutex_unlock(&mk->lock2);
+  }
+
+  return (void *)flip;
+}
+
+JNIEXPORT jbyteArray JNICALL Java_com_keepassdroid_crypto_finalkey_NativeFinalKey_nTransformMasterKey(JNIEnv *env, jobject this, jbyteArray seed, jbyteArray key, jint rounds) {
+  master_key mk;
+  uint32_t flip;
+  pthread_t t1, t2;
+  int iret;
+  void *vret1, *vret2;
+  jbyteArray result;
+  sha256_ctx h_ctx[1] __attribute__ ((aligned (16)));
+
+  // step 1: housekeeping - sanity checks and fetch data from the JVM
+  if( (*env)->GetArrayLength(env, seed) != MASTER_KEY_SIZE ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: the seed is not the correct size");
+    return NULL;
+  }
+  if( (*env)->GetArrayLength(env, key) != MASTER_KEY_SIZE ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: the key is not the correct size");
+    return NULL;
+  }
+  if( rounds < 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: illegal number of encryption rounds");
+    return NULL;
+  }
+  mk.rounds = (uint32_t)rounds;
+  mk.done[0] = mk.done[1] = 0;
+  if( pthread_mutex_init(&mk.lock1, NULL) != 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: failed to initialize the mutex for thread 1"); // FIXME: get a better exception class for this...
+    return NULL;
+  }
+  if( pthread_mutex_init(&mk.lock2, NULL) != 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: failed to initialize the mutex for thread 2"); // FIXME: get a better exception class for this...
+    return NULL;
+  }
+  (*env)->GetByteArrayRegion(env, seed, 0, MASTER_KEY_SIZE, (jbyte *)mk.c_seed);
+  (*env)->GetByteArrayRegion(env, key, 0, MASTER_KEY_SIZE, (jbyte *)mk.key1);
+
+  // step 2: encrypt the hash "rounds" (default: 6000) times
+  iret = pthread_create( &t1, NULL, generate_key_material, (void*)&mk );
+  if( iret != 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: failed to launch thread 1"); // FIXME: get a better exception class for this...
+    return NULL;
+  }
+  iret = pthread_create( &t2, NULL, generate_key_material, (void*)&mk );
+  if( iret != 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: failed to launch thread 2"); // FIXME: get a better exception class for this...
+    return NULL;
+  }
+  iret = pthread_join( t1, &vret1 );
+  if( iret != 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: failed to join thread 1"); // FIXME: get a better exception class for this...
+    return NULL;
+  }
+  iret = pthread_join( t2, &vret2 );
+  if( iret != 0 ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: failed to join thread 2"); // FIXME: get a better exception class for this...
+    return NULL;
+  }
+  if( vret1 == (void *)(-1) || vret2 == (void *)(-1) || vret1 != vret2 ) {
+    (*env)->ThrowNew(env, bad_arg, "TransformMasterKey: invalid flip value(s) from completed thread(s)"); // FIXME: get a better exception class for this...
+    return NULL;
+  } else {
+    flip = (uint32_t)vret1;
+  }
+
+  // step 3: final SHA256 hash
+  sha256_begin(h_ctx);
+  if( flip ) {
+    sha256_hash(mk.key2, MASTER_KEY_SIZE, h_ctx);
+    sha256_end(mk.key1, h_ctx);
+    flip = 0;
+  } else {
+    sha256_hash(mk.key1, MASTER_KEY_SIZE, h_ctx);
+    sha256_end(mk.key2, h_ctx);
+    flip = 1;
+  }
+
+  // step 4: send the hash into the JVM
+  result = (*env)->NewByteArray(env, MASTER_KEY_SIZE);
+  if( flip )
+    (*env)->SetByteArrayRegion(env, result, 0, MASTER_KEY_SIZE, (jbyte *)mk.key2);
+  else
+    (*env)->SetByteArrayRegion(env, result, 0, MASTER_KEY_SIZE, (jbyte *)mk.key1);
+
+  return result;
+}
+#undef MASTER_KEY_SIZE
+
--- a/src/java/kp2akeytransform/jni/prep_build.sh
+++ b/src/java/kp2akeytransform/jni/prep_build.sh
@ -0,0 +1,5 @@
+#!/bin/sh
+SHA_FILE="sha2-07-01-07.zip"
+
+curl http://gladman.plushost.co.uk/oldsite/cryptography_technology/sha/$SHA_FILE > $SHA_FILE
+unzip $SHA_FILE -d sha
--- a/src/java/kp2akeytransform/jni/sha/.gitignore
+++ b/src/java/kp2akeytransform/jni/sha/.gitignore
@ -0,0 +1,13 @@
+brg_endian.h
+brg_types.h
+hmac.c
+hmac.h
+pwd2key.c
+pwd2key.h
+sha1b.c
+sha1.c
+sha1.h
+sha2b.c
+sha2.c
+sha2.h
+shasum.c
--- a/src/java/kp2akeytransform/jni/sha/Android.mk
+++ b/src/java/kp2akeytransform/jni/sha/Android.mk
@ -0,0 +1,14 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := sha
+
+LOCAL_SRC_FILES := \
+	sha1.c \
+	sha2.c \
+	hmac.c
+
+LOCAL_CFLAGS := -DUSE_SHA256
+
+include $(BUILD_STATIC_LIBRARY)
--- a/src/java/kp2akeytransform/libs/.gitignore
+++ b/src/java/kp2akeytransform/libs/.gitignore
@ -0,0 +1,8 @@
+[^.]
+armeabi
+armeabi-v7a
+x86
+mips
+mips-r2
+mips-r2-sf
+ant-contrib-0.3.jar
--- a/src/java/kp2akeytransform/project.properties
+++ b/src/java/kp2akeytransform/project.properties
@ -0,0 +1,15 @@
+# This file is automatically generated by Android Tools.
+# Do not modify this file -- YOUR CHANGES WILL BE ERASED!
+#
+# This file must be checked in Version Control Systems.
+#
+# To customize properties used by the Ant build system use,
+# "ant.properties", and override values to adapt the script to your
+# project structure.
+
+# Indicates whether an apk should be generated for each density.
+split.density=false
+# Project target.
+target=android-12
+apk-configurations=
+android.library=true
--- a/src/java/kp2akeytransform/src/com/keepassdroid/crypto/NativeLib.java
+++ b/src/java/kp2akeytransform/src/com/keepassdroid/crypto/NativeLib.java
@ -0,0 +1,45 @@
+/*
+ * Copyright 2009 Brian Pellin.
+ *     
+ * This file is part of KeePassDroid.
+ *
+ *  KeePassDroid is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  KeePassDroid is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with KeePassDroid.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package com.keepassdroid.crypto;
+
+public class NativeLib {
+	private static boolean isLoaded = false;
+	private static boolean loadSuccess = false;
+	
+	public static boolean loaded() {
+		return init();
+	}
+	
+	public static boolean init() {
+		if ( ! isLoaded ) {
+			try {
+				System.loadLibrary("final-key");
+			} catch ( UnsatisfiedLinkError e) {
+				return false;
+			}
+			isLoaded = true;
+			loadSuccess = true;
+		}
+		
+		return loadSuccess;
+		
+	}
+
+}
--- a/src/java/kp2akeytransform/src/com/keepassdroid/crypto/finalkey/AndroidFinalKey.java
+++ b/src/java/kp2akeytransform/src/com/keepassdroid/crypto/finalkey/AndroidFinalKey.java
@ -0,0 +1,78 @@
+/*
+ * Copyright 2009 Brian Pellin.
+ *     
+ * This file is part of KeePassDroid.
+ *
+ *  KeePassDroid is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  KeePassDroid is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with KeePassDroid.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package com.keepassdroid.crypto.finalkey;
+
+import java.io.IOException;
+import java.security.InvalidKeyException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+import javax.crypto.Cipher;
+import javax.crypto.NoSuchPaddingException;
+import javax.crypto.ShortBufferException;
+import javax.crypto.spec.SecretKeySpec;
+
+public class AndroidFinalKey extends FinalKey {
+
+	@Override
+	public byte[] transformMasterKey(byte[] pKeySeed, byte[] pKey, int rounds) throws IOException {
+		Cipher cipher;
+		try {
+			cipher = Cipher.getInstance("AES/ECB/NoPadding");
+		} catch (NoSuchAlgorithmException e) {
+			throw new IOException("NoSuchAlgorithm: " + e.getMessage());
+		} catch (NoSuchPaddingException e) {
+			throw new IOException("NoSuchPadding: " + e.getMessage());
+		}
+
+		try {
+			cipher.init(Cipher.ENCRYPT_MODE, new SecretKeySpec(pKeySeed, "AES"));
+		} catch (InvalidKeyException e) {
+			throw new IOException("InvalidPasswordException: " + e.getMessage());
+		}
+
+		// Encrypt key rounds times
+		byte[] newKey = new byte[pKey.length];
+		System.arraycopy(pKey, 0, newKey, 0, pKey.length);
+		byte[] destKey = new byte[pKey.length];
+		for (int i = 0; i < rounds; i++) {
+			try {
+				cipher.update(newKey, 0, newKey.length, destKey, 0);
+				System.arraycopy(destKey, 0, newKey, 0, newKey.length);
+
+			} catch (ShortBufferException e) {
+				throw new IOException("Short buffer: " + e.getMessage());
+			}
+		}
+
+		// Hash the key
+		MessageDigest md = null;
+		try {
+			md = MessageDigest.getInstance("SHA-256");
+		} catch (NoSuchAlgorithmException e) {
+			assert true;
+			throw new IOException("SHA-256 not implemented here: " + e.getMessage());
+		}
+
+		md.update(newKey);
+		return md.digest();
+	}
+
+}
--- a/src/java/kp2akeytransform/src/com/keepassdroid/crypto/finalkey/FinalKey.java
+++ b/src/java/kp2akeytransform/src/com/keepassdroid/crypto/finalkey/FinalKey.java
@ -0,0 +1,26 @@
+/*
+ * Copyright 2009 Brian Pellin.
+ *     
+ * This file is part of KeePassDroid.
+ *
+ *  KeePassDroid is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  KeePassDroid is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with KeePassDroid.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package com.keepassdroid.crypto.finalkey;
+
+import java.io.IOException;
+
+public abstract class FinalKey {
+	public abstract byte[] transformMasterKey(byte[] seed, byte[] key, int rounds) throws IOException;
+}
--- a/src/java/kp2akeytransform/src/com/keepassdroid/crypto/finalkey/FinalKeyFactory.java
+++ b/src/java/kp2akeytransform/src/com/keepassdroid/crypto/finalkey/FinalKeyFactory.java
@ -0,0 +1,36 @@
+/*
+ * Copyright 2009 Brian Pellin.
+ *     
+ * This file is part of KeePassDroid.
+ *
+ *  KeePassDroid is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  KeePassDroid is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with KeePassDroid.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package com.keepassdroid.crypto.finalkey;
+
+public class FinalKeyFactory {
+	public static FinalKey createFinalKey() {
+		return createFinalKey(false);
+	}
+	
+	public static FinalKey createFinalKey(boolean androidOverride) {
+		// Prefer the native final key implementation
+		if ( ! androidOverride && NativeFinalKey.availble() ) {
+			return new NativeFinalKey();
+		} else {
+			// Fall back on the android crypto implementation
+			return new AndroidFinalKey();
+		}
+	}
+}
--- a/src/java/kp2akeytransform/src/com/keepassdroid/crypto/finalkey/NativeFinalKey.java
+++ b/src/java/kp2akeytransform/src/com/keepassdroid/crypto/finalkey/NativeFinalKey.java
@ -0,0 +1,55 @@
+/*
+ * Copyright 2009 Brian Pellin.
+ *     
+ * This file is part of KeePassDroid.
+ *
+ *  KeePassDroid is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  KeePassDroid is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with KeePassDroid.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package com.keepassdroid.crypto.finalkey;
+
+import java.io.IOException;
+
+import com.keepassdroid.crypto.NativeLib;
+
+
+public class NativeFinalKey extends FinalKey {
+	
+	public static boolean availble() {
+		return NativeLib.init();
+	}
+
+	@Override
+	public byte[] transformMasterKey(byte[] seed, byte[] key, int rounds) throws IOException {
+		NativeLib.init();
+		
+		return nTransformMasterKey(seed, key, rounds);
+
+	}
+	
+	private static native byte[] nTransformMasterKey(byte[] seed, byte[] key, int rounds);
+
+	// For testing
+	/*
+	public static byte[] reflect(byte[] key) {
+		NativeLib.init();
+		
+		return nativeReflect(key);
+	}
+	
+	private static native byte[] nativeReflect(byte[] key);
+	*/
+	
+
+}
--- a/src/keepass2android/keepass2android.csproj
+++ b/src/keepass2android/keepass2android.csproj
@ -259,6 +259,9 @@
    <None Include="Resources\values-vi\strings.xml">
      <Visible>False</Visible>
    </None>
+    <None Include="..\java\kp2akeytransform\libs\mips\libfinal-key.so">
+      <Link>libs\mips\libfinal-key.so</Link>
+    </None>
  </ItemGroup>
  <ItemGroup>
    <AndroidResource Include="Resources\drawable\ic00.png" />
@ -627,12 +630,20 @@
    <Folder Include="Resources\values-zh-rTW\" />
    <Folder Include="SupportLib\" />
    <Folder Include="Assets\" />
+    <Folder Include="libs\" />
+    <Folder Include="libs\armeabi-v7a\" />
+    <Folder Include="libs\armeabi\" />
+    <Folder Include="libs\mips\" />
  </ItemGroup>
  <ItemGroup>
    <ProjectReference Include="..\KeePassLib2Android\KeePassLib2Android.csproj">
      <Project>{545B4A6B-8BBA-4FBE-92FC-4AC060122A54}</Project>
      <Name>KeePassLib2Android</Name>
    </ProjectReference>
+    <ProjectReference Include="..\kp2akeytransform\kp2akeytransform.csproj">
+      <Project>{A57B3ACE-5634-469A-88C4-858BB409F356}</Project>
+      <Name>kp2akeytransform</Name>
+    </ProjectReference>
  </ItemGroup>
  <ProjectExtensions>
    <MonoDevelop>
@ -646,4 +657,14 @@
  <ItemGroup>
    <AndroidJavaLibrary Include="SupportLib\android-support-v4.jar" />
  </ItemGroup>
+  <ItemGroup>
+    <AndroidNativeLibrary Include="..\java\kp2akeytransform\libs\armeabi\libfinal-key.so">
+      <Link>libs\armeabi\libfinal-key.so</Link>
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </AndroidNativeLibrary>
+    <AndroidNativeLibrary Include="..\java\kp2akeytransform\libs\armeabi-v7a\libfinal-key.so">
+      <Link>libs\armeabi-v7a\libfinal-key.so</Link>
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </AndroidNativeLibrary>
+  </ItemGroup>
 </Project>
--- a/src/kp2akeytransform/Additions/AboutAdditions.txt
+++ b/src/kp2akeytransform/Additions/AboutAdditions.txt
@ -0,0 +1,48 @@
+Additions allow you to add arbitrary C# to the generated classes
+before they are compiled.  This can be helpful for providing convenience
+methods or adding pure C# classes.
+
+== Adding Methods to Generated Classes ==
+
+Let's say the library being bound has a Rectangle class with a constructor
+that takes an x and y position, and a width and length size.  It will look like
+this:
+
+public partial class Rectangle
+{
+    public Rectangle (int x, int y, int width, int height)
+	{
+	    // JNI bindings
+	}
+}
+
+Imagine we want to add a constructor to this class that takes a Point and
+Size structure instead of 4 ints.  We can add a new file called Rectangle.cs
+with a partial class containing our new method:
+
+public partial class Rectangle
+{
+    public Rectangle (Point location, Size size) :
+	    this (location.X, location.Y, size.Width, size.Height)
+	{
+	}
+}
+
+At compile time, the additions class will be added to the generated class
+and the final assembly will a Rectangle class with both constructors.
+
+
+== Adding C# Classes ==
+
+Another thing that can be done is adding fully C# managed classes to the
+generated library.  In the above example, let's assume that there isn't a
+Point class available in Java or our library.  The one we create doesn't need
+to interact with Java, so we'll create it like a normal class in C#.
+
+By adding a Point.cs file with this class, it will end up in the binding library:
+
+public class Point
+{
+    public int X { get; set; }
+	public int Y { get; set; }
+}
--- a/src/kp2akeytransform/Jars/AboutJars.txt
+++ b/src/kp2akeytransform/Jars/AboutJars.txt
@ -0,0 +1,37 @@
+This directory is for Android .jars.
+
+There are 3 types of jars that are supported:
+
+== Input Jar and Embedded Jar ==
+
+This is the jar that bindings should be generated for.
+
+For example, if you were binding the Google Maps library, this would
+be Google's "maps.jar".
+
+The difference between EmbeddedJar and InputJar is, EmbeddedJar is to be
+embedded in the resulting dll as EmbeddedResource, while InputJar is not.
+There are couple of reasons you wouldn't like to embed the target jar
+in your dll (the ones that could be internally loaded by <uses-library>
+feature e.g. maps.jar, or you cannot embed jars that are under some
+proprietary license).
+
+Set the build action for these jars in the properties page to "InputJar".
+
+
+== Reference Jar and Embedded Reference Jar ==
+
+These are jars that are referenced by the input jar.  C# bindings will
+not be created for these jars.  These jars will be used to resolve
+types used by the input jar.
+
+NOTE: Do not add "android.jar" as a reference jar.  It will be added automatically
+based on the Target Framework selected.
+
+Set the build action for these jars in the properties page to "ReferenceJar".
+
+"EmbeddedJar" works like "ReferenceJar", but like "EmbeddedJar", it is
+embedded in your dll. But at application build time, they are not included
+in the final apk, like ReferenceJar files.
+
+
--- a/src/kp2akeytransform/Properties/AssemblyInfo.cs
+++ b/src/kp2akeytransform/Properties/AssemblyInfo.cs
@ -0,0 +1,28 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using Android.App;
+
+// Information about this assembly is defined by the following attributes. 
+// Change them to the values specific to your project.
+
+[assembly: AssemblyTitle("kp2akeytransform")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("")]
+[assembly: AssemblyCopyright("Philipp")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
+// The form "{Major}.{Minor}.*" will automatically update the build and revision,
+// and "{Major}.{Minor}.{Build}.*" will update just the revision.
+
+[assembly: AssemblyVersion("1.0.0")]
+
+// The following attributes are used to specify the signing key for the assembly, 
+// if desired. See the Mono documentation for more information about signing.
+
+//[assembly: AssemblyDelaySign(false)]
+//[assembly: AssemblyKeyFile("")]
+
--- a/src/kp2akeytransform/Transforms/EnumFields.xml
+++ b/src/kp2akeytransform/Transforms/EnumFields.xml
@ -0,0 +1,14 @@
+<enum-field-mappings>
+	<!--
+  This example converts the constants Fragment_id, Fragment_name,
+  and Fragment_tag from android.support.v4.app.FragmentActivity.FragmentTag
+  to an enum called Android.Support.V4.App.FragmentTagType with values
+  Id, Name, and Tag.
+  
+  <type enum-type="Android\Support\V4\App\FragmentTagType" java-class="android/support/v4/app/FragmentActivity$FragmentTag">
+    <member enum="Id" java="Fragment_id" value="1" />
+    <member enum="Name" java="Fragment_name" value="0" />
+    <member enum="Tag" java="Fragment_tag" value="2" />
+  </type>
+  -->
+</enum-field-mappings>
--- a/src/kp2akeytransform/Transforms/EnumMethods.xml
+++ b/src/kp2akeytransform/Transforms/EnumMethods.xml
@ -0,0 +1,11 @@
+<enum-method-mappings>
+	<!--
+  This example changes the Java method:
+    android.support.v4.app.Fragment.SavedState.writeToParcel (int flags)
+  to be:
+    android.support.v4.app.Fragment.SavedState.writeToParcel (Android.OS.ParcelableWriteFlags flags)
+  when bound in C#.
+  
+  <map package="android.support.v4.app" class="Fragment.SavedState" method="writeToParcel" parameter="flags" enum="Android.OS.ParcelableWriteFlags" />
+  -->
+</enum-method-mappings>
--- a/src/kp2akeytransform/Transforms/Metadata.xml
+++ b/src/kp2akeytransform/Transforms/Metadata.xml
@ -0,0 +1,9 @@
+<metadata>
+	<!--
+  This sample removes the class: android.support.v4.content.AsyncTaskLoader.LoadTask:
+  <remove-node path="/api/package[@name='android.support.v4.content']/class[@name='AsyncTaskLoader.LoadTask']" />
+  
+  This sample removes the method: android.support.v4.content.CursorLoader.loadInBackground:
+  <remove-node path="/api/package[@name='android.support.v4.content']/class[@name='CursorLoader']/method[@name='loadInBackground']" />
+  -->
+</metadata>
--- a/src/kp2akeytransform/kp2akeytransform.csproj
+++ b/src/kp2akeytransform/kp2akeytransform.csproj
@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProductVersion>10.0.0</ProductVersion>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{A57B3ACE-5634-469A-88C4-858BB409F356}</ProjectGuid>
+    <ProjectTypeGuids>{EFBA0AD7-5A72-4C68-AF49-83D382785DCF};{10368E6C-D01B-4462-8E8B-01FC667A7035};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <OutputType>Library</OutputType>
+    <RootNamespace>kp2akeytransform</RootNamespace>
+    <MonoAndroidAssetsPrefix>Assets</MonoAndroidAssetsPrefix>
+    <MonoAndroidResourcePrefix>Resources</MonoAndroidResourcePrefix>
+    <AssemblyName>kp2akeytransform</AssemblyName>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>True</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>False</Optimize>
+    <OutputPath>bin\Debug</OutputPath>
+    <DefineConstants>DEBUG;</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+    <ConsolePause>False</ConsolePause>
+    <AndroidLinkMode>None</AndroidLinkMode>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>none</DebugType>
+    <Optimize>True</Optimize>
+    <OutputPath>bin\Release</OutputPath>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+    <AndroidUseSharedRuntime>False</AndroidUseSharedRuntime>
+    <ConsolePause>False</ConsolePause>
+    <AndroidLinkMode>SdkOnly</AndroidLinkMode>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Xml" />
+    <Reference Include="System.Core" />
+    <Reference Include="Mono.Android" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="Additions\AboutAdditions.txt" />
+    <None Include="Jars\AboutJars.txt" />
+    <None Include="..\java\kp2akeytransform\libs\armeabi\libfinal-key.so">
+      <Link>libs\armeabi\libfinal-key.so</Link>
+    </None>
+    <None Include="..\java\kp2akeytransform\libs\armeabi-v7a\libfinal-key.so">
+      <Link>libs\armeabi-v7a\libfinal-key.so</Link>
+    </None>
+    <None Include="..\java\kp2akeytransform\libs\mips\libfinal-key.so">
+      <Link>libs\mips\libfinal-key.so</Link>
+    </None>
+  </ItemGroup>
+  <ItemGroup>
+    <TransformFile Include="Transforms\EnumFields.xml" />
+    <TransformFile Include="Transforms\EnumMethods.xml" />
+    <TransformFile Include="Transforms\Metadata.xml" />
+  </ItemGroup>
+  <Import Project="$(MSBuildExtensionsPath)\Novell\Xamarin.Android.Bindings.targets" />
+  <ItemGroup>
+    <Folder Include="libs\" />
+    <Folder Include="libs\armeabi\" />
+    <Folder Include="libs\armeabi-v7a\" />
+    <Folder Include="libs\mips\" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedJar Include="..\java\kp2akeytransform\bin\kp2akeytransform.jar">
+      <Link>Jars\kp2akeytransform.jar</Link>
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </EmbeddedJar>
+  </ItemGroup>
+</Project>