OSDN > 開發者 > lafekasloan101 > 工作室 > system-coremmmm > 提交

system-corennnn
Fork

R/O
HTTP
SSH
HTTPS

提交

Commit MetaInfo

修訂	42361542c8aa1edc8c1892e1d194ac8d0894ca5a (tree)
時間	2016-12-09 16:05:38
作者	Chih-Wei Huang <cwhuang@linu...>
Commiter	Chih-Wei Huang

Log Message

Software-accelerated Pixel Flinger

The typical graphic workloads on VirtualBox are improved 3X.

Supports both x86 and x86_64.

Similar change by HazouPH <jgrharbers@gmail.com>:
http://review.cyanogenmod.org/#/c/70896/

And by Quanganh pham <quanganh2627@gmail.com>:
http://review.cyanogenmod.org/#/c/97125/

Change Summary

modified: libpixelflinger/Android.mk (diff)
add: libpixelflinger/codeflinger/Android.mk (diff)
add: libpixelflinger/codeflinger/x86/GGLX86Assembler.cpp (diff)
add: libpixelflinger/codeflinger/x86/GGLX86Assembler.h (diff)
add: libpixelflinger/codeflinger/x86/X86Assembler.cpp (diff)
add: libpixelflinger/codeflinger/x86/X86Assembler.h (diff)
add: libpixelflinger/codeflinger/x86/blending.cpp (diff)
add: libpixelflinger/codeflinger/x86/libenc/Android.mk (diff)
add: libpixelflinger/codeflinger/x86/libenc/README.txt (diff)
add: libpixelflinger/codeflinger/x86/libenc/dec_base.cpp (diff)
add: libpixelflinger/codeflinger/x86/libenc/dec_base.h (diff)
add: libpixelflinger/codeflinger/x86/libenc/enc_base.cpp (diff)
add: libpixelflinger/codeflinger/x86/libenc/enc_base.h (diff)
add: libpixelflinger/codeflinger/x86/libenc/enc_defs.h (diff)
add: libpixelflinger/codeflinger/x86/libenc/enc_defs_ext.h (diff)
add: libpixelflinger/codeflinger/x86/libenc/enc_prvt.h (diff)
add: libpixelflinger/codeflinger/x86/libenc/enc_tabl.cpp (diff)
add: libpixelflinger/codeflinger/x86/libenc/enc_wrapper.cpp (diff)
add: libpixelflinger/codeflinger/x86/libenc/enc_wrapper.h (diff)
add: libpixelflinger/codeflinger/x86/libenc/encoder.h (diff)
add: libpixelflinger/codeflinger/x86/libenc/encoder.inl (diff)
add: libpixelflinger/codeflinger/x86/load_store.cpp (diff)
add: libpixelflinger/codeflinger/x86/texturing.cpp (diff)
modified: libpixelflinger/pixelflinger.cpp (diff)
modified: libpixelflinger/scanline.cpp (diff)
modified: libpixelflinger/tests/codegen/Android.mk (diff)
modified: libpixelflinger/tests/codegen/codegen.cpp (diff)

差異

--- a/libpixelflinger/Android.mk

+++ b/libpixelflinger/Android.mk

		@@ -7,9 +7,16 @@ include $(CLEAR_VARS)
7	7
8	8	include $(CLEAR_VARS)
9	9	PIXELFLINGER_SRC_FILES:= \
	10	+ codeflinger/CodeCache.cpp \
	11	+ format.cpp \
	12	+ clear.cpp \
	13	+ raster.cpp \
	14	+ buffer.cpp
	15	+
	16	+ifeq ($(filter x86%,$(TARGET_ARCH)),)
	17	+PIXELFLINGER_SRC_FILES += \
10	18	codeflinger/ARMAssemblerInterface.cpp \
11	19	codeflinger/ARMAssemblerProxy.cpp \
12		- codeflinger/CodeCache.cpp \
13	20	codeflinger/GGLAssembler.cpp \
14	21	codeflinger/load_store.cpp \
15	22	codeflinger/blending.cpp \

		@@ -19,10 +26,8 @@ PIXELFLINGER_SRC_FILES:= \
19	26	pixelflinger.cpp.arm \
20	27	trap.cpp.arm \
21	28	scanline.cpp.arm \
22		- format.cpp \
23		- clear.cpp \
24		- raster.cpp \
25		- buffer.cpp
	29	+
	30	+endif
26	31
27	32	PIXELFLINGER_CFLAGS := -fstrict-aliasing -fomit-frame-pointer
28	33

		@@ -43,6 +48,18 @@ PIXELFLINGER_SRC_FILES_arm64 := \
43	48	arch-arm64/col32cb16blend.S \
44	49	arch-arm64/t32cb16blend.S \
45	50
	51	+PIXELFLINGER_SRC_FILES_x86 := \
	52	+ codeflinger/x86/X86Assembler.cpp \
	53	+ codeflinger/x86/GGLX86Assembler.cpp \
	54	+ codeflinger/x86/load_store.cpp \
	55	+ codeflinger/x86/blending.cpp \
	56	+ codeflinger/x86/texturing.cpp \
	57	+ fixed.cpp \
	58	+ picker.cpp \
	59	+ pixelflinger.cpp \
	60	+ trap.cpp \
	61	+ scanline.cpp
	62	+
46	63	ifndef ARCH_MIPS_REV6
47	64	PIXELFLINGER_SRC_FILES_mips := \
48	65	codeflinger/MIPSAssembler.cpp \

		@@ -66,6 +83,8 @@ LOCAL_MODULE:= libpixelflinger
66	83	LOCAL_SRC_FILES := $(PIXELFLINGER_SRC_FILES)
67	84	LOCAL_SRC_FILES_arm := $(PIXELFLINGER_SRC_FILES_arm)
68	85	LOCAL_SRC_FILES_arm64 := $(PIXELFLINGER_SRC_FILES_arm64)
	86	+LOCAL_SRC_FILES_x86 := $(PIXELFLINGER_SRC_FILES_x86)
	87	+LOCAL_SRC_FILES_x86_64 := $(PIXELFLINGER_SRC_FILES_x86)
69	88	LOCAL_SRC_FILES_mips := $(PIXELFLINGER_SRC_FILES_mips)
70	89	LOCAL_SRC_FILES_mips64 := $(PIXELFLINGER_SRC_FILES_mips64)
71	90	LOCAL_CFLAGS := $(PIXELFLINGER_CFLAGS)

		@@ -73,6 +92,8 @@ LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include
73	92	LOCAL_C_INCLUDES += $(LOCAL_EXPORT_C_INCLUDE_DIRS) \
74	93	external/safe-iop/include
75	94	LOCAL_SHARED_LIBRARIES := libcutils liblog libutils
	95	+LOCAL_WHOLE_STATIC_LIBRARIES_x86 := libenc
	96	+LOCAL_WHOLE_STATIC_LIBRARIES_x86_64 := libenc
76	97
77	98	# Really this should go away entirely or at least not depend on
78	99	# libhardware, but this at least gets us built.

--- /dev/null

+++ b/libpixelflinger/codeflinger/Android.mk

		@@ -0,0 +1,3 @@
	1	+ifneq ($(filter x86%,$(TARGET_ARCH)),)
	2	+include $(call all-named-subdir-makefiles,x86/libenc)
	3	+endif

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/GGLX86Assembler.cpp

		@@ -0,0 +1,1507 @@
	1	+/* libs/pixelflinger/codeflinger/x86/GGLX86Assembler.cpp
	2	+**
	3	+** Copyright 2006, The Android Open Source Project
	4	+**
	5	+** Licensed under the Apache License, Version 2.0 (the "License");
	6	+** you may not use this file except in compliance with the License.
	7	+** You may obtain a copy of the License at
	8	+**
	9	+** http://www.apache.org/licenses/LICENSE-2.0
	10	+**
	11	+** Unless required by applicable law or agreed to in writing, software
	12	+** distributed under the License is distributed on an "AS IS" BASIS,
	13	+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+** See the License for the specific language governing permissions and
	15	+** limitations under the License.
	16	+*/
	17	+
	18	+#define LOG_TAG "GGLX86Assembler"
	19	+
	20	+#include <assert.h>
	21	+#include <stdint.h>
	22	+#include <stdlib.h>
	23	+#include <stdio.h>
	24	+#include <sys/types.h>
	25	+#include <cutils/log.h>
	26	+
	27	+#include "codeflinger/x86/GGLX86Assembler.h"
	28	+
	29	+namespace android {
	30	+
	31	+// ----------------------------------------------------------------------------
	32	+
	33	+GGLX86Assembler::GGLX86Assembler(const sp<Assembly>& assembly)
	34	+ : X86Assembler(assembly), X86RegisterAllocator(), mOptLevel(7)
	35	+{
	36	+}
	37	+
	38	+GGLX86Assembler::~GGLX86Assembler()
	39	+{
	40	+}
	41	+
	42	+void GGLX86Assembler::reset(int opt_level)
	43	+{
	44	+ X86Assembler::reset();
	45	+ X86RegisterAllocator::reset();
	46	+ mOptLevel = opt_level;
	47	+}
	48	+
	49	+// ---------------------------------------------------------------------------
	50	+
	51	+int GGLX86Assembler::scanline(const needs_t& needs, context_t const* c)
	52	+{
	53	+ int err = 0;
	54	+ err = scanline_core(needs, c);
	55	+ if (err != 0)
	56	+ ALOGE("scanline_core failed probably due to running out of the registers: %d\n", err);
	57	+
	58	+ // XXX: in theory, pcForLabel is not valid before generate()
	59	+ char* fragment_start_pc = pcForLabel("fragment_loop");
	60	+ char* fragment_end_pc = pcForLabel("fragment_end");
	61	+ const int per_fragment_ins_size = int(fragment_end_pc - fragment_start_pc);
	62	+
	63	+ // build a name for our pipeline
	64	+ char name[128];
	65	+ sprintf(name,
	66	+ "scanline__%08X:%08X_%08X_%08X [%3d ipp ins size]",
	67	+ needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ins_size);
	68	+
	69	+ if (err) {
	70	+ ALOGE("Error while generating ""%s""\n", name);
	71	+ disassemble(name);
	72	+ return -1;
	73	+ }
	74	+
	75	+ return generate(name);
	76	+}
	77	+
	78	+int GGLX86Assembler::scanline_core(const needs_t& needs, context_t const* c)
	79	+{
	80	+ int64_t duration = ggl_system_time();
	81	+
	82	+ mBlendFactorCached = 0;
	83	+ mBlending = 0;
	84	+ mMasking = 0;
	85	+ mAA = GGL_READ_NEEDS(P_AA, needs.p);
	86	+ mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
	87	+ mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
	88	+ mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
	89	+ mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
	90	+ mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
	91	+ mBuilderContext.needs = needs;
	92	+ mBuilderContext.c = c;
	93	+ mBuilderContext.Rctx = obtainReg(); //dynamically obtain if used and then immediately recycle it if not used
	94	+ mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
	95	+
	96	+ // ------------------------------------------------------------------------
	97	+
	98	+ decodeLogicOpNeeds(needs);
	99	+
	100	+ decodeTMUNeeds(needs, c);
	101	+
	102	+ mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
	103	+ mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
	104	+ mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
	105	+ mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
	106	+
	107	+ if (!mCbFormat.c[GGLFormat::ALPHA].h) {
	108	+ if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) \|\|
	109	+ (mBlendSrc == GGL_DST_ALPHA)) {
	110	+ mBlendSrc = GGL_ONE;
	111	+ }
	112	+ if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) \|\|
	113	+ (mBlendSrcA == GGL_DST_ALPHA)) {
	114	+ mBlendSrcA = GGL_ONE;
	115	+ }
	116	+ if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) \|\|
	117	+ (mBlendDst == GGL_DST_ALPHA)) {
	118	+ mBlendDst = GGL_ONE;
	119	+ }
	120	+ if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) \|\|
	121	+ (mBlendDstA == GGL_DST_ALPHA)) {
	122	+ mBlendDstA = GGL_ONE;
	123	+ }
	124	+ }
	125	+
	126	+ // if we need the framebuffer, read it now
	127	+ const int blending = blending_codes(mBlendSrc, mBlendDst) \|
	128	+ blending_codes(mBlendSrcA, mBlendDstA);
	129	+
	130	+ // XXX: handle special cases, destination not modified...
	131	+ if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
	132	+ (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
	133	+ // Destination unmodified (beware of logic ops)
	134	+ } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
	135	+ (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
	136	+ // Destination is zero (beware of logic ops)
	137	+ }
	138	+
	139	+ int fbComponents = 0;
	140	+ const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
	141	+ for (int i=0 ; i<4 ; i++) {
	142	+ const int mask = 1<<i;
	143	+ component_info_t& info = mInfo[i];
	144	+ int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
	145	+ int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
	146	+ if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
	147	+ fs = GGL_ONE;
	148	+ info.masked = !!(masking & mask);
	149	+ info.inDest = !info.masked && mCbFormat.c[i].h &&
	150	+ ((mLogicOp & LOGIC_OP_SRC) \|\| (!mLogicOp));
	151	+ if (mCbFormat.components >= GGL_LUMINANCE &&
	152	+ (i==GGLFormat::GREEN \|\| i==GGLFormat::BLUE)) {
	153	+ info.inDest = false;
	154	+ }
	155	+ info.needed = (i==GGLFormat::ALPHA) &&
	156	+ (isAlphaSourceNeeded() \|\| mAlphaTest != GGL_ALWAYS);
	157	+ info.replaced = !!(mTextureMachine.replaced & mask);
	158	+ info.iterated = (!info.replaced && (info.inDest \|\| info.needed));
	159	+ info.smooth = mSmooth && info.iterated;
	160	+ info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
	161	+ info.blend = (fs != int(GGL_ONE)) \|\| (fd > int(GGL_ZERO));
	162	+
	163	+ mBlending \|= (info.blend ? mask : 0);
	164	+ mMasking \|= (mCbFormat.c[i].h && info.masked) ? mask : 0;
	165	+ fbComponents \|= mCbFormat.c[i].h ? mask : 0;
	166	+ }
	167	+
	168	+ mAllMasked = (mMasking == fbComponents);
	169	+ if (mAllMasked) {
	170	+ mDithering = 0;
	171	+ }
	172	+
	173	+ fragment_parts_t parts;
	174	+
	175	+ // ------------------------------------------------------------------------
	176	+ callee_work();
	177	+ // ------------------------------------------------------------------------
	178	+
	179	+ mCurSp = -12; // %ebx, %edi, %esi
	180	+ prepare_esp(0);
	181	+ build_scanline_preparation(parts, needs);
	182	+ recycleReg(mBuilderContext.Rctx);
	183	+
	184	+ if (registerFile().status())
	185	+ return registerFile().status();
	186	+
	187	+ // ------------------------------------------------------------------------
	188	+ label("fragment_loop");
	189	+ // ------------------------------------------------------------------------
	190	+ {
	191	+ Scratch regs(registerFile());
	192	+ int temp_reg = -1;
	193	+
	194	+ if (mDithering) {
	195	+ // update the dither index.
	196	+ temp_reg = regs.obtain();
	197	+ //To load to register and calculate should be fast than the memory operations
	198	+ MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, temp_reg);
	199	+ ROR(GGL_DITHER_ORDER_SHIFT, temp_reg);
	200	+ ADD_IMM_TO_REG(1 << (32 - GGL_DITHER_ORDER_SHIFT), temp_reg);
	201	+ ROR(32 - GGL_DITHER_ORDER_SHIFT, temp_reg);
	202	+ MOV_REG_TO_MEM(temp_reg, parts.count.offset_ebp, PhysicalReg_EBP);
	203	+ regs.recycle(temp_reg);
	204	+
	205	+ }
	206	+
	207	+ // XXX: could we do an early alpha-test here in some cases?
	208	+ // It would probaly be used only with smooth-alpha and no texture
	209	+ // (or no alpha component in the texture).
	210	+
	211	+ // Early z-test
	212	+ if (mAlphaTest==GGL_ALWAYS) {
	213	+ build_depth_test(parts, Z_TEST\|Z_WRITE);
	214	+ } else {
	215	+ // we cannot do the z-write here, because
	216	+ // it might be killed by the alpha-test later
	217	+ build_depth_test(parts, Z_TEST);
	218	+ }
	219	+
	220	+ { // texture coordinates
	221	+ Scratch scratches(registerFile());
	222	+
	223	+ // texel generation
	224	+ build_textures(parts, regs);
	225	+
	226	+ }
	227	+
	228	+ if ((blending & (FACTOR_DST\|BLEND_DST)) \|\|
	229	+ (mMasking && !mAllMasked) \|\|
	230	+ (mLogicOp & LOGIC_OP_DST))
	231	+ {
	232	+ // blending / logic_op / masking need the framebuffer
	233	+ mDstPixel.setTo(regs.obtain(), &mCbFormat);
	234	+
	235	+ // load the framebuffer pixel
	236	+ comment("fetch color-buffer");
	237	+ parts.cbPtr.reg = regs.obtain();
	238	+ MOV_MEM_TO_REG(parts.cbPtr.offset_ebp, PhysicalReg_EBP, parts.cbPtr.reg);
	239	+ load(parts.cbPtr, mDstPixel);
	240	+ mCurSp = mCurSp - 4;
	241	+ mDstPixel.offset_ebp = mCurSp;
	242	+ MOV_REG_TO_MEM(mDstPixel.reg, mDstPixel.offset_ebp, EBP);
	243	+ regs.recycle(mDstPixel.reg);
	244	+ regs.recycle(parts.cbPtr.reg);
	245	+ mDstPixel.reg = -1;
	246	+ }
	247	+
	248	+ if (registerFile().status())
	249	+ return registerFile().status();
	250	+
	251	+ pixel_t pixel;
	252	+ int directTex = mTextureMachine.directTexture;
	253	+ if (directTex \| parts.packed) {
	254	+ // note: we can't have both here
	255	+ // iterated color or direct texture
	256	+ if(directTex) {
	257	+ pixel.offset_ebp = parts.texel[directTex-1].offset_ebp;
	258	+ }
	259	+ else
	260	+ pixel.offset_ebp = parts.iterated.offset_ebp;
	261	+ pixel.reg = regs.obtain();
	262	+ MOV_MEM_TO_REG(pixel.offset_ebp, EBP, pixel.reg);
	263	+ //pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
	264	+ pixel.flags &= ~CORRUPTIBLE;
	265	+ } else {
	266	+ if (mDithering) {
	267	+ mBuilderContext.Rctx = regs.obtain();
	268	+ temp_reg = regs.obtain();
	269	+ const int ctxtReg = mBuilderContext.Rctx;
	270	+ MOV_MEM_TO_REG(8, EBP, ctxtReg);
	271	+ const int mask = GGL_DITHER_SIZE-1;
	272	+ parts.dither = reg_t(regs.obtain());
	273	+ MOV_MEM_TO_REG(parts.count.offset_ebp, EBP, parts.dither.reg);
	274	+ AND_IMM_TO_REG(mask, parts.dither.reg);
	275	+ ADD_REG_TO_REG(ctxtReg, parts.dither.reg);
	276	+ MOVZX_MEM_TO_REG(OpndSize_8, parts.dither.reg, GGL_OFFSETOF(ditherMatrix), temp_reg);
	277	+ MOV_REG_TO_REG(temp_reg, parts.dither.reg);
	278	+ mCurSp = mCurSp - 4;
	279	+ parts.dither.offset_ebp = mCurSp;
	280	+ MOV_REG_TO_MEM(parts.dither.reg, parts.dither.offset_ebp, EBP);
	281	+ regs.recycle(parts.dither.reg);
	282	+ regs.recycle(temp_reg);
	283	+ regs.recycle(mBuilderContext.Rctx);
	284	+
	285	+ }
	286	+
	287	+ // allocate a register for the resulting pixel
	288	+ pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
	289	+
	290	+ build_component(pixel, parts, GGLFormat::ALPHA, regs);
	291	+
	292	+ if (mAlphaTest!=GGL_ALWAYS) {
	293	+ // only handle the z-write part here. We know z-test
	294	+ // was successful, as well as alpha-test.
	295	+ build_depth_test(parts, Z_WRITE);
	296	+ }
	297	+
	298	+ build_component(pixel, parts, GGLFormat::RED, regs);
	299	+ build_component(pixel, parts, GGLFormat::GREEN, regs);
	300	+ build_component(pixel, parts, GGLFormat::BLUE, regs);
	301	+
	302	+ pixel.flags \|= CORRUPTIBLE;
	303	+ }
	304	+
	305	+ if (registerFile().status()) {
	306	+ return registerFile().status();
	307	+ }
	308	+
	309	+ if (pixel.reg == -1) {
	310	+ // be defensive here. if we're here it's probably
	311	+ // that this whole fragment is a no-op.
	312	+ pixel = mDstPixel;
	313	+ }
	314	+
	315	+ if (!mAllMasked) {
	316	+ // logic operation
	317	+ build_logic_op(pixel, regs);
	318	+
	319	+ // masking
	320	+ build_masking(pixel, regs);
	321	+
	322	+ comment("store");
	323	+ parts.cbPtr.reg = regs.obtain();
	324	+ MOV_MEM_TO_REG(parts.cbPtr.offset_ebp, EBP, parts.cbPtr.reg);
	325	+ store(parts.cbPtr, pixel, WRITE_BACK);
	326	+ MOV_REG_TO_MEM(parts.cbPtr.reg, parts.cbPtr.offset_ebp, EBP);
	327	+ regs.recycle(parts.cbPtr.reg);
	328	+ regs.recycle(pixel.reg);
	329	+ }
	330	+ }
	331	+
	332	+ if (registerFile().status())
	333	+ return registerFile().status();
	334	+
	335	+ // update the iterated color...
	336	+ if (parts.reload != 3) {
	337	+ build_smooth_shade(parts);
	338	+ }
	339	+
	340	+ // update iterated z
	341	+ build_iterate_z(parts);
	342	+
	343	+ // update iterated fog
	344	+ build_iterate_f(parts);
	345	+
	346	+ //SUB_IMM_TO_REG(1<<16, parts.count.reg);
	347	+ SUB_IMM_TO_MEM(1<<16, parts.count.offset_ebp, EBP);
	348	+
	349	+ JCC(Mnemonic_JNS, "fragment_loop");
	350	+ label("fragment_end");
	351	+ int update_esp_offset, shrink_esp_offset;
	352	+ update_esp_offset = shrink_esp_offset = -mCurSp - 12; // 12 is ebx, esi, edi
	353	+ update_esp(update_esp_offset);
	354	+ shrink_esp(shrink_esp_offset);
	355	+ return_work();
	356	+
	357	+ if ((mAlphaTest!=GGL_ALWAYS) \|\| (mDepthTest!=GGL_ALWAYS)) {
	358	+ if (mDepthTest!=GGL_ALWAYS) {
	359	+ label("discard_before_textures");
	360	+ build_iterate_texture_coordinates(parts);
	361	+ }
	362	+ label("discard_after_textures");
	363	+ build_smooth_shade(parts);
	364	+ build_iterate_z(parts);
	365	+ build_iterate_f(parts);
	366	+ if (!mAllMasked) {
	367	+ //ADD_IMM_TO_REG(parts.cbPtr.size>>3, parts.cbPtr.reg);
	368	+ ADD_IMM_TO_MEM(parts.cbPtr.size>>3, parts.cbPtr.offset_ebp, EBP);
	369	+ }
	370	+ SUB_IMM_TO_MEM(1<<16, parts.count.offset_ebp, EBP);
	371	+ //SUB_IMM_TO_REG(1<<16, parts.count.reg);
	372	+ JCC(Mnemonic_JNS, "fragment_loop");
	373	+ update_esp_offset = shrink_esp_offset = -mCurSp - 12; // 12 is ebx, esi, edi
	374	+ update_esp(update_esp_offset);
	375	+ shrink_esp(shrink_esp_offset);
	376	+ return_work();
	377	+ }
	378	+
	379	+ return registerFile().status();
	380	+}
	381	+
	382	+// ---------------------------------------------------------------------------
	383	+
	384	+void GGLX86Assembler::build_scanline_preparation(
	385	+ fragment_parts_t& parts, const needs_t& needs)
	386	+{
	387	+ Scratch scratches(registerFile());
	388	+
	389	+ // compute count
	390	+ comment("compute ct (# of pixels to process)");
	391	+ int temp_reg;
	392	+ parts.count.setTo(obtainReg());
	393	+ int Rx = scratches.obtain();
	394	+ int Ry = scratches.obtain();
	395	+ // the only argument is +8 bytes relative to the current EBP
	396	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	397	+ CONTEXT_LOAD(Rx, iterators.xl);
	398	+ CONTEXT_LOAD(parts.count.reg, iterators.xr);
	399	+ CONTEXT_LOAD(Ry, iterators.y);
	400	+
	401	+ // parts.count = iterators.xr - Rx
	402	+ SUB_REG_TO_REG(Rx, parts.count.reg);
	403	+ SUB_IMM_TO_REG(1, parts.count.reg);
	404	+
	405	+ if (mDithering) {
	406	+ // parts.count.reg = 0xNNNNXXDD
	407	+ // NNNN = count-1
	408	+ // DD = dither offset
	409	+ // XX = 0xxxxxxx (x = garbage)
	410	+ Scratch scratches(registerFile());
	411	+ int tx = scratches.obtain();
	412	+ int ty = scratches.obtain();
	413	+
	414	+ MOV_REG_TO_REG(Rx,tx);
	415	+ AND_IMM_TO_REG(GGL_DITHER_MASK, tx);
	416	+ MOV_REG_TO_REG(Ry,ty);
	417	+ AND_IMM_TO_REG(GGL_DITHER_MASK, ty);
	418	+ SHL(GGL_DITHER_ORDER_SHIFT, ty);
	419	+ ADD_REG_TO_REG(ty, tx);
	420	+ SHL(16, parts.count.reg);
	421	+ OR_REG_TO_REG(tx, parts.count.reg);
	422	+ scratches.recycle(tx);
	423	+ scratches.recycle(ty);
	424	+ } else {
	425	+ // parts.count.reg = 0xNNNN0000
	426	+ // NNNN = count-1
	427	+ SHL(16, parts.count.reg);
	428	+ }
	429	+ mCurSp = mCurSp - 4;
	430	+ parts.count.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg
	431	+ MOV_REG_TO_MEM(parts.count.reg, parts.count.offset_ebp, EBP);
	432	+ //PUSH(parts.count.reg);
	433	+ recycleReg(parts.count.reg);
	434	+ parts.count.reg=-1;
	435	+ if (!mAllMasked) {
	436	+ // compute dst ptr
	437	+ comment("compute color-buffer pointer");
	438	+ const int cb_bits = mCbFormat.size*8;
	439	+ int Rs = scratches.obtain();
	440	+ temp_reg = scratches.obtain();
	441	+ CONTEXT_LOAD(Rs, state.buffers.color.stride);
	442	+ MOVSX_REG_TO_REG(OpndSize_16, Ry, temp_reg);
	443	+ MOVSX_REG_TO_REG(OpndSize_16, Rs, Rs);
	444	+ IMUL(temp_reg, Rs);
	445	+ scratches.recycle(temp_reg);
	446	+ ADD_REG_TO_REG(Rx, Rs);
	447	+
	448	+ parts.cbPtr.setTo(obtainReg(), cb_bits);
	449	+ CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
	450	+ reg_t temp_reg_t;
	451	+ temp_reg_t.setTo(Rs);
	452	+ base_offset(parts.cbPtr, parts.cbPtr, temp_reg_t);
	453	+
	454	+ mCurSp = mCurSp - 4;
	455	+ parts.cbPtr.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg
	456	+ MOV_REG_TO_MEM(parts.cbPtr.reg, parts.cbPtr.offset_ebp, EBP);
	457	+ //PUSH(parts.cbPtr.reg);
	458	+ recycleReg(parts.cbPtr.reg);
	459	+ parts.cbPtr.reg=-1;
	460	+ scratches.recycle(Rs);
	461	+ }
	462	+
	463	+ // init fog
	464	+ const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
	465	+ if (need_fog) {
	466	+ comment("compute initial fog coordinate");
	467	+ Scratch scratches(registerFile());
	468	+ int ydfdy = scratches.obtain();
	469	+ int dfdx = scratches.obtain();
	470	+ CONTEXT_LOAD(dfdx, generated_vars.dfdx);
	471	+ IMUL(Rx, dfdx);
	472	+ CONTEXT_LOAD(ydfdy, iterators.ydfdy);
	473	+ ADD_REG_TO_REG(ydfdy, dfdx); // Rx * dfdx + ydfdy
	474	+ CONTEXT_STORE(dfdx, generated_vars.f);
	475	+ scratches.recycle(dfdx);
	476	+ scratches.recycle(ydfdy);
	477	+ }
	478	+
	479	+ // init Z coordinate
	480	+ if ((mDepthTest != GGL_ALWAYS) \|\| GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
	481	+ parts.z = reg_t(obtainReg());
	482	+ comment("compute initial Z coordinate");
	483	+ Scratch scratches(registerFile());
	484	+ int dzdx = scratches.obtain();
	485	+ int ydzdy = parts.z.reg;
	486	+ CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
	487	+ IMUL(Rx, dzdx);
	488	+ CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
	489	+ ADD_REG_TO_REG(dzdx, ydzdy); // parts.z.reg = Rx * dzdx + ydzdy
	490	+
	491	+ mCurSp = mCurSp - 4;
	492	+ parts.z.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg, parts.z.reg
	493	+ MOV_REG_TO_MEM(ydzdy, parts.z.offset_ebp, EBP);
	494	+ //PUSH(ydzdy);
	495	+ recycleReg(ydzdy);
	496	+ parts.z.reg=-1;
	497	+
	498	+ // we're going to index zbase of parts.count
	499	+ // zbase = base + (xl-count + stridey)2 by arm
	500	+ // !!! Actually, zbase = base + (xl + stridey)2
	501	+ int Rs = dzdx;
	502	+ int zbase = scratches.obtain();
	503	+ temp_reg = zbase;
	504	+ CONTEXT_LOAD(Rs, state.buffers.depth.stride);
	505	+ MOVSX_REG_TO_REG(OpndSize_16, Rs, Rs);
	506	+ MOV_REG_TO_REG(Ry, temp_reg);
	507	+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg, temp_reg);
	508	+ IMUL(temp_reg, Rs);
	509	+ ADD_REG_TO_REG(Rx, Rs);
	510	+ // load parts.count.reg
	511	+ MOV_MEM_TO_REG(parts.count.offset_ebp, EBP, temp_reg);
	512	+ SHR(16, temp_reg);
	513	+ ADD_REG_TO_REG(temp_reg, Rs);
	514	+ SHL(1, Rs);
	515	+ CONTEXT_LOAD(zbase, state.buffers.depth.data);
	516	+ ADD_REG_TO_REG(Rs, zbase);
	517	+ CONTEXT_STORE(zbase, generated_vars.zbase);
	518	+ scratches.recycle(zbase);
	519	+ scratches.recycle(dzdx);
	520	+ }
	521	+ // the rgisters are all used up
	522	+
	523	+ // init texture coordinates
	524	+ init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
	525	+ scratches.recycle(Ry);
	526	+
	527	+ // iterated color
	528	+ init_iterated_color(parts, reg_t(Rx));
	529	+
	530	+ // init coverage factor application (anti-aliasing)
	531	+ if (mAA) {
	532	+ parts.covPtr.setTo(obtainReg(), 16);
	533	+ CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
	534	+ SHL(1, Rx);
	535	+ ADD_REG_TO_REG(Rx, parts.covPtr.reg);
	536	+
	537	+ mCurSp = mCurSp - 4;
	538	+ parts.covPtr.offset_ebp = mCurSp;
	539	+ MOV_REG_TO_MEM(parts.covPtr.reg, parts.covPtr.offset_ebp, EBP);
	540	+ //PUSH(parts.covPtr.reg);
	541	+ recycleReg(parts.covPtr.reg);
	542	+ parts.covPtr.reg=-1;
	543	+ }
	544	+ scratches.recycle(Rx);
	545	+}
	546	+
	547	+// ---------------------------------------------------------------------------
	548	+
	549	+void GGLX86Assembler::build_component( pixel_t& pixel,
	550	+ fragment_parts_t& parts,
	551	+ int component,
	552	+ Scratch& regs)
	553	+{
	554	+ static char const * comments[] = {"alpha", "red", "green", "blue"};
	555	+ comment(comments[component]);
	556	+
	557	+ // local register file
	558	+ Scratch scratches(registerFile());
	559	+ const int dst_component_size = pixel.component_size(component);
	560	+
	561	+ component_t temp(-1);
	562	+ build_incoming_component( temp, dst_component_size,
	563	+ parts, component, scratches, regs);
	564	+
	565	+ if (mInfo[component].inDest) {
	566	+ // blending...
	567	+ build_blending( temp, mDstPixel, component, scratches );
	568	+
	569	+ // downshift component and rebuild pixel...
	570	+ downshift(pixel, component, temp, parts.dither);
	571	+ }
	572	+}
	573	+
	574	+void GGLX86Assembler::build_incoming_component(
	575	+ component_t& temp,
	576	+ int dst_size,
	577	+ fragment_parts_t& parts,
	578	+ int component,
	579	+ Scratch& scratches,
	580	+ Scratch& global_regs)
	581	+{
	582	+ const uint32_t component_mask = 1<<component;
	583	+
	584	+ // Figure out what we need for the blending stage...
	585	+ int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
	586	+ int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
	587	+ if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
	588	+ fs = GGL_ONE;
	589	+ }
	590	+
	591	+ // Figure out what we need to extract and for what reason
	592	+ const int blending = blending_codes(fs, fd);
	593	+
	594	+ // Are we actually going to blend?
	595	+ const int need_blending = (fs != int(GGL_ONE)) \|\| (fd > int(GGL_ZERO));
	596	+
	597	+ // expand the source if the destination has more bits
	598	+ int need_expander = false;
	599	+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
	600	+ texture_unit_t& tmu = mTextureMachine.tmu[i];
	601	+ if ((tmu.format_idx) &&
	602	+ (parts.texel[i].component_size(component) < dst_size)) {
	603	+ need_expander = true;
	604	+ }
	605	+ }
	606	+
	607	+ // do we need to extract this component?
	608	+ const bool multiTexture = mTextureMachine.activeUnits > 1;
	609	+ const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
	610	+ (isAlphaSourceNeeded());
	611	+ int need_extract = mInfo[component].needed;
	612	+ if (mInfo[component].inDest)
	613	+ {
	614	+ need_extract \|= ((need_blending ?
	615	+ (blending & (BLEND_SRC\|FACTOR_SRC)) : need_expander));
	616	+ need_extract \|= (mTextureMachine.mask != mTextureMachine.replaced);
	617	+ need_extract \|= mInfo[component].smooth;
	618	+ need_extract \|= mInfo[component].fog;
	619	+ need_extract \|= mDithering;
	620	+ need_extract \|= multiTexture;
	621	+ }
	622	+
	623	+ if (need_extract) {
	624	+ Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
	625	+ component_t fragment;
	626	+
	627	+ // iterated color
	628	+ fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE);
	629	+ build_iterated_color(fragment, parts, component, regs);
	630	+
	631	+ // texture environment (decal, modulate, replace)
	632	+ build_texture_environment(fragment, parts, component, regs);
	633	+
	634	+ // expand the source if the destination has more bits
	635	+ if (need_expander && (fragment.size() < dst_size)) {
	636	+ // we're here only if we fetched a texel
	637	+ // (so we know for sure fragment is CORRUPTIBLE)
	638	+ //fragment is stored on the stack
	639	+ expand(fragment, fragment, dst_size);
	640	+ }
	641	+
	642	+ mCurSp = mCurSp - 4;
	643	+ fragment.offset_ebp = mCurSp;
	644	+ MOV_REG_TO_MEM(fragment.reg, fragment.offset_ebp, EBP);
	645	+ regs.recycle(fragment.reg);
	646	+
	647	+ // We have a few specific things to do for the alpha-channel
	648	+ if ((component==GGLFormat::ALPHA) &&
	649	+ (mInfo[component].needed \|\| fragment.size()<dst_size))
	650	+ {
	651	+ // convert to integer_t first and make sure
	652	+ // we don't corrupt a needed register
	653	+ if (fragment.l) {
	654	+ //component_t incoming(fragment);
	655	+ // actually fragment is not corruptible
	656	+ //modify(fragment, regs);
	657	+ //MOV_REG_TO_REG(incoming.reg, fragment.reg);
	658	+ SHR(fragment.l, fragment.offset_ebp, EBP);
	659	+ fragment.h -= fragment.l;
	660	+ fragment.l = 0;
	661	+ }
	662	+
	663	+ // I haven't found any case to trigger coverage and the following alpha test (mAlphaTest != GGL_ALWAYS)
	664	+ fragment.reg = regs.obtain();
	665	+ MOV_MEM_TO_REG(fragment.offset_ebp, EBP, fragment.reg);
	666	+
	667	+ // coverage factor application
	668	+ build_coverage_application(fragment, parts, regs);
	669	+ // alpha-test
	670	+ build_alpha_test(fragment, parts);
	671	+
	672	+ MOV_REG_TO_MEM(fragment.reg, fragment.offset_ebp, EBP);
	673	+ regs.recycle(fragment.reg);
	674	+
	675	+ if (blend_needs_alpha_source) {
	676	+ // We keep only 8 bits for the blending stage
	677	+ const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
	678	+
	679	+ if (fragment.flags & CORRUPTIBLE) {
	680	+ fragment.flags &= ~CORRUPTIBLE;
	681	+ mAlphaSource.setTo(fragment.reg,
	682	+ fragment.size(), fragment.flags, fragment.offset_ebp);
	683	+ //mCurSp = mCurSp - 4;
	684	+ //mAlphaSource.offset_ebp = mCurSp;
	685	+ if (shift) {
	686	+ SHR(shift, mAlphaSource.offset_ebp, EBP);
	687	+ }
	688	+ } else {
	689	+ // XXX: it would better to do this in build_blend_factor()
	690	+ // so we can avoid the extra MOV below.
	691	+ mAlphaSource.setTo(regs.obtain(),
	692	+ fragment.size(), CORRUPTIBLE);
	693	+ mCurSp = mCurSp - 4;
	694	+ mAlphaSource.offset_ebp = mCurSp;
	695	+ if (shift) {
	696	+ MOV_MEM_TO_REG(fragment.offset_ebp, EBP, mAlphaSource.reg);
	697	+ SHR(shift, mAlphaSource.reg);
	698	+ } else {
	699	+ MOV_MEM_TO_REG(fragment.offset_ebp, EBP, mAlphaSource.reg);
	700	+ }
	701	+ MOV_REG_TO_MEM(mAlphaSource.reg, mAlphaSource.offset_ebp, EBP);
	702	+ regs.recycle(mAlphaSource.reg);
	703	+ }
	704	+ mAlphaSource.s -= shift;
	705	+
	706	+ }
	707	+ }
	708	+
	709	+ // fog...
	710	+ build_fog( fragment, component, regs );
	711	+
	712	+ temp = fragment;
	713	+ } else {
	714	+ if (mInfo[component].inDest) {
	715	+ // extraction not needed and replace
	716	+ // we just select the right component
	717	+ if ((mTextureMachine.replaced & component_mask) == 0) {
	718	+ // component wasn't replaced, so use it!
	719	+ temp = component_t(parts.iterated, component);
	720	+ }
	721	+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
	722	+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
	723	+ if ((tmu.mask & component_mask) &&
	724	+ ((tmu.replaced & component_mask) == 0)) {
	725	+ temp = component_t(parts.texel[i], component);
	726	+ }
	727	+ }
	728	+ }
	729	+ }
	730	+}
	731	+
	732	+bool GGLX86Assembler::isAlphaSourceNeeded() const
	733	+{
	734	+ // XXX: also needed for alpha-test
	735	+ const int bs = mBlendSrc;
	736	+ const int bd = mBlendDst;
	737	+ return bs==GGL_SRC_ALPHA_SATURATE \|\|
	738	+ bs==GGL_SRC_ALPHA \|\| bs==GGL_ONE_MINUS_SRC_ALPHA \|\|
	739	+ bd==GGL_SRC_ALPHA \|\| bd==GGL_ONE_MINUS_SRC_ALPHA ;
	740	+}
	741	+
	742	+// ---------------------------------------------------------------------------
	743	+
	744	+void GGLX86Assembler::build_smooth_shade(fragment_parts_t& parts)
	745	+{
	746	+ if (mSmooth && !parts.iterated_packed) {
	747	+ // update the iterated color in a pipelined way...
	748	+ comment("update iterated color");
	749	+ Scratch scratches(registerFile());
	750	+ mBuilderContext.Rctx = scratches.obtain();
	751	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	752	+
	753	+ const int reload = parts.reload;
	754	+ for (int i=0 ; i<4 ; i++) {
	755	+ if (!mInfo[i].iterated)
	756	+ continue;
	757	+
	758	+ int dx = parts.argb_dx[i].reg;
	759	+ int c = parts.argb[i].reg;
	760	+ dx = scratches.obtain();
	761	+ c = scratches.obtain();
	762	+ CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
	763	+ CONTEXT_LOAD(c, generated_vars.argb[i].c);
	764	+
	765	+ //if (reload & 1) {
	766	+ // c = scratches.obtain();
	767	+ // CONTEXT_LOAD(c, generated_vars.argb[i].c);
	768	+ //}
	769	+ //if (reload & 2) {
	770	+ // dx = scratches.obtain();
	771	+ // CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
	772	+ //}
	773	+
	774	+ if (mSmooth) {
	775	+ ADD_REG_TO_REG(dx, c);
	776	+ }
	777	+
	778	+ CONTEXT_STORE(c, generated_vars.argb[i].c);
	779	+ scratches.recycle(c);
	780	+ scratches.recycle(dx);
	781	+ //if (reload & 1) {
	782	+ // CONTEXT_STORE(c, generated_vars.argb[i].c);
	783	+ // scratches.recycle(c);
	784	+ //}
	785	+ //if (reload & 2) {
	786	+ // scratches.recycle(dx);
	787	+ //}
	788	+ }
	789	+ scratches.recycle(mBuilderContext.Rctx);
	790	+ }
	791	+}
	792	+
	793	+// ---------------------------------------------------------------------------
	794	+
	795	+void GGLX86Assembler::build_coverage_application(component_t& fragment,
	796	+ fragment_parts_t& parts, Scratch& regs)
	797	+{
	798	+ // here fragment.l is guarenteed to be 0
	799	+ if (mAA) {
	800	+ // coverages are 1.15 fixed-point numbers
	801	+ comment("coverage application");
	802	+
	803	+ component_t incoming(fragment);
	804	+ modify(fragment, regs);
	805	+
	806	+ Scratch scratches(registerFile());
	807	+ int cf = scratches.obtain();
	808	+ parts.covPtr.reg = scratches.obtain();
	809	+ MOV_MEM_TO_REG(parts.covPtr.offset_ebp, EBP, parts.covPtr.reg);
	810	+ MOVZX_MEM_TO_REG(OpndSize_16, parts.covPtr.reg, 2, cf); // refer to LDRH definition
	811	+ scratches.recycle(parts.covPtr.reg);
	812	+ if (fragment.h > 31) {
	813	+ fragment.h--;
	814	+
	815	+ int flag_push_edx = 0;
	816	+ int flag_reserve_edx = 0;
	817	+ int temp_reg2 = -1;
	818	+ int edx_offset_ebp = 0;
	819	+ if(scratches.isUsed(EDX) == 1) {
	820	+ if(incoming.reg != EDX && cf != EDX) {
	821	+ flag_push_edx = 1;
	822	+ mCurSp = mCurSp - 4;
	823	+ edx_offset_ebp = mCurSp;
	824	+ MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP);
	825	+ }
	826	+ }
	827	+ else {
	828	+ flag_reserve_edx = 1;
	829	+ scratches.reserve(EDX);
	830	+ }
	831	+ if(scratches.isUsed(EAX)) {
	832	+ if( cf == EAX \|\| incoming.reg == EAX) {
	833	+ MOVSX_REG_TO_REG(OpndSize_16, cf, cf);
	834	+ if(cf == EAX)
	835	+ IMUL(incoming.reg);
	836	+ else
	837	+ IMUL(cf);
	838	+ SHL(16, EDX);
	839	+ SHR(16, EAX);
	840	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	841	+ MOV_REG_TO_REG(EDX, incoming.reg);
	842	+ }
	843	+ else {
	844	+ int eax_offset_ebp = 0;
	845	+ if(scratches.countFreeRegs() > 0) {
	846	+ temp_reg2 = scratches.obtain();
	847	+ MOV_REG_TO_REG(EAX, temp_reg2);
	848	+ }
	849	+ else {
	850	+ mCurSp = mCurSp - 4;
	851	+ eax_offset_ebp = mCurSp;
	852	+ MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP);
	853	+ }
	854	+ MOV_REG_TO_REG(cf, EAX);
	855	+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
	856	+ IMUL(incoming.reg);
	857	+ SHL(16, EDX);
	858	+ SHR(16, EAX);
	859	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	860	+ MOV_REG_TO_REG(EDX, incoming.reg);
	861	+ if(temp_reg2 > -1) {
	862	+ MOV_REG_TO_REG(temp_reg2, EAX);
	863	+ scratches.recycle(temp_reg2);
	864	+ }
	865	+ else {
	866	+ MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX);
	867	+ }
	868	+ }
	869	+ }
	870	+ else {
	871	+ MOV_REG_TO_REG(cf, EAX);
	872	+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
	873	+ IMUL(incoming.reg);
	874	+ SHL(16, EDX);
	875	+ SHR(16, EAX);
	876	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	877	+ MOV_REG_TO_REG(EDX, incoming.reg);
	878	+ }
	879	+ if(flag_push_edx == 1) {
	880	+ MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX);
	881	+ }
	882	+ if(flag_reserve_edx ==1)
	883	+ scratches.recycle(EDX);
	884	+
	885	+ MOV_REG_TO_REG(incoming.reg, fragment.reg);
	886	+
	887	+ //IMUL(cf, incoming.reg);
	888	+ } else {
	889	+ MOV_REG_TO_REG(incoming.reg, fragment.reg);
	890	+ SHL(1, fragment.reg);
	891	+
	892	+ int flag_push_edx = 0;
	893	+ int flag_reserve_edx = 0;
	894	+ int temp_reg2 = -1;
	895	+ int edx_offset_ebp = 0;
	896	+ if(scratches.isUsed(EDX) == 1) {
	897	+ if(fragment.reg != EDX && cf != EDX) {
	898	+ flag_push_edx = 1;
	899	+ mCurSp = mCurSp - 4;
	900	+ edx_offset_ebp = mCurSp;
	901	+ MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP);
	902	+ }
	903	+ }
	904	+ else {
	905	+ flag_reserve_edx = 1;
	906	+ scratches.reserve(EDX);
	907	+ }
	908	+ if(scratches.isUsed(EAX)) {
	909	+ if( cf == EAX \|\| fragment.reg == EAX) {
	910	+ MOVSX_REG_TO_REG(OpndSize_16, cf, cf);
	911	+ if(cf == EAX)
	912	+ IMUL(fragment.reg);
	913	+ else
	914	+ IMUL(cf);
	915	+ SHL(16, EDX);
	916	+ SHR(16, EAX);
	917	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	918	+ MOV_REG_TO_REG(EDX, fragment.reg);
	919	+ }
	920	+ else {
	921	+ int eax_offset_ebp = 0;
	922	+ if(scratches.countFreeRegs() > 0) {
	923	+ temp_reg2 = scratches.obtain();
	924	+ MOV_REG_TO_REG(EAX, temp_reg2);
	925	+ }
	926	+ else {
	927	+ mCurSp = mCurSp - 4;
	928	+ eax_offset_ebp = mCurSp;
	929	+ MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP);
	930	+ }
	931	+ MOV_REG_TO_REG(cf, EAX);
	932	+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
	933	+ IMUL(fragment.reg);
	934	+ SHL(16, EDX);
	935	+ SHR(16, EAX);
	936	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	937	+ MOV_REG_TO_REG(EDX, fragment.reg);
	938	+ if(temp_reg2 > -1) {
	939	+ MOV_REG_TO_REG(temp_reg2, EAX);
	940	+ scratches.recycle(temp_reg2);
	941	+ }
	942	+ else {
	943	+ MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX);
	944	+ }
	945	+ }
	946	+ }
	947	+ else {
	948	+ MOV_REG_TO_REG(cf, EAX);
	949	+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
	950	+ IMUL(fragment.reg);
	951	+ SHL(16, EDX);
	952	+ SHR(16, EAX);
	953	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	954	+ MOV_REG_TO_REG(EDX, fragment.reg);
	955	+ }
	956	+ if(flag_push_edx == 1) {
	957	+ MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX);
	958	+ }
	959	+ if(flag_reserve_edx ==1)
	960	+ scratches.recycle(EDX);
	961	+
	962	+ //IMUL(cf, fragment.reg);
	963	+ }
	964	+ scratches.recycle(cf);
	965	+ }
	966	+}
	967	+
	968	+// ---------------------------------------------------------------------------
	969	+
	970	+void GGLX86Assembler::build_alpha_test(component_t& fragment,
	971	+ const fragment_parts_t& parts)
	972	+{
	973	+ if (mAlphaTest != GGL_ALWAYS) {
	974	+ comment("Alpha Test");
	975	+ Scratch scratches(registerFile());
	976	+ int ref = scratches.obtain();
	977	+ mBuilderContext.Rctx = scratches.obtain();
	978	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	979	+ const int shift = GGL_COLOR_BITS-fragment.size();
	980	+ CONTEXT_LOAD(ref, state.alpha_test.ref);
	981	+ scratches.recycle(mBuilderContext.Rctx);
	982	+ if (shift) {
	983	+ SHR(shift, ref);
	984	+ CMP_REG_TO_REG(ref, fragment.reg);
	985	+ } else CMP_REG_TO_REG(ref, fragment.reg);
	986	+ Mnemonic cc = Mnemonic_NULL;
	987	+ //int cc = NV;
	988	+ switch (mAlphaTest) {
	989	+ case GGL_NEVER:
	990	+ JMP("discard_after_textures");
	991	+ return;
	992	+ break;
	993	+ case GGL_LESS:
	994	+ cc = Mnemonic_JNL;
	995	+ break;
	996	+ case GGL_EQUAL:
	997	+ cc = Mnemonic_JNE;
	998	+ break;
	999	+ case GGL_LEQUAL:
	1000	+ cc = Mnemonic_JB;
	1001	+ break;
	1002	+ case GGL_GREATER:
	1003	+ cc = Mnemonic_JLE;
	1004	+ break;
	1005	+ case GGL_NOTEQUAL:
	1006	+ cc = Mnemonic_JE;
	1007	+ break;
	1008	+ case GGL_GEQUAL:
	1009	+ cc = Mnemonic_JNC;
	1010	+ break;
	1011	+ }
	1012	+ JCC(cc, "discard_after_textures");
	1013	+ //B(cc^1, "discard_after_textures");
	1014	+ }
	1015	+}
	1016	+
	1017	+// ---------------------------------------------------------------------------
	1018	+
	1019	+void GGLX86Assembler::build_depth_test(
	1020	+ const fragment_parts_t& parts, uint32_t mask)
	1021	+{
	1022	+ mask &= Z_TEST\|Z_WRITE;
	1023	+ int store_flag = 0;
	1024	+ const needs_t& needs = mBuilderContext.needs;
	1025	+ const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
	1026	+ Scratch scratches(registerFile());
	1027	+
	1028	+ if (mDepthTest != GGL_ALWAYS \|\| zmask) {
	1029	+ Mnemonic ic = Mnemonic_NULL;
	1030	+ switch (mDepthTest) {
	1031	+ case GGL_LESS:
	1032	+ ic = Mnemonic_JBE;
	1033	+ break;
	1034	+ case GGL_EQUAL:
	1035	+ ic = Mnemonic_JNE;
	1036	+ break;
	1037	+ case GGL_LEQUAL:
	1038	+ ic = Mnemonic_JB;
	1039	+ break;
	1040	+ case GGL_GREATER:
	1041	+ ic = Mnemonic_JGE;
	1042	+ break;
	1043	+ case GGL_NOTEQUAL:
	1044	+ ic = Mnemonic_JE;
	1045	+ break;
	1046	+ case GGL_GEQUAL:
	1047	+ ic = Mnemonic_JA;
	1048	+ break;
	1049	+ case GGL_NEVER:
	1050	+ // this never happens, because it's taken care of when
	1051	+ // computing the needs. but we keep it for completness.
	1052	+ comment("Depth Test (NEVER)");
	1053	+ JMP("discard_before_textures");
	1054	+ return;
	1055	+ case GGL_ALWAYS:
	1056	+ // we're here because zmask is enabled
	1057	+ mask &= ~Z_TEST; // test always passes.
	1058	+ break;
	1059	+ }
	1060	+
	1061	+
	1062	+ if ((mask & Z_WRITE) && !zmask) {
	1063	+ mask &= ~Z_WRITE;
	1064	+ }
	1065	+
	1066	+ if (!mask)
	1067	+ return;
	1068	+
	1069	+ comment("Depth Test");
	1070	+
	1071	+ int zbase = scratches.obtain();
	1072	+ mBuilderContext.Rctx = scratches.obtain();
	1073	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	1074	+ CONTEXT_LOAD(zbase, generated_vars.zbase); // stall
	1075	+ scratches.recycle(mBuilderContext.Rctx);
	1076	+
	1077	+ int temp_reg1 = scratches.obtain();
	1078	+ int depth = scratches.obtain();
	1079	+ int z = parts.z.reg;
	1080	+ MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, temp_reg1);
	1081	+ SHR(15, temp_reg1);
	1082	+ SUB_REG_TO_REG(temp_reg1, zbase);
	1083	+
	1084	+ // above does zbase = zbase + ((count >> 16) << 1)
	1085	+
	1086	+ if (mask & Z_TEST) {
	1087	+ MOVZX_MEM_TO_REG(OpndSize_16, zbase, 0, depth);
	1088	+ MOV_MEM_TO_REG(parts.z.offset_ebp, PhysicalReg_EBP, temp_reg1);
	1089	+ SHR(16, temp_reg1);
	1090	+ CMP_REG_TO_REG(temp_reg1, depth);
	1091	+ JCC(ic, "discard_before_textures");
	1092	+
	1093	+ }
	1094	+ if (mask & Z_WRITE) {
	1095	+ if (mask == Z_WRITE) {
	1096	+ // only z-write asked, cc is meaningless
	1097	+ store_flag = 1;
	1098	+ }
	1099	+ // actually it must be stored since the above branch is not taken
	1100	+ MOV_REG_TO_MEM(temp_reg1, 0, zbase, OpndSize_16);
	1101	+ }
	1102	+ scratches.recycle(temp_reg1);
	1103	+ scratches.recycle(zbase);
	1104	+ scratches.recycle(depth);
	1105	+ }
	1106	+}
	1107	+
	1108	+void GGLX86Assembler::build_iterate_z(const fragment_parts_t& parts)
	1109	+{
	1110	+ const needs_t& needs = mBuilderContext.needs;
	1111	+ if ((mDepthTest != GGL_ALWAYS) \|\| GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
	1112	+ Scratch scratches(registerFile());
	1113	+ int dzdx = scratches.obtain();
	1114	+ mBuilderContext.Rctx = scratches.obtain();
	1115	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	1116	+ CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
	1117	+ scratches.recycle(mBuilderContext.Rctx);
	1118	+ ADD_REG_TO_MEM(dzdx, EBP, parts.z.offset_ebp);
	1119	+ scratches.recycle(dzdx);
	1120	+ }
	1121	+}
	1122	+
	1123	+void GGLX86Assembler::build_iterate_f(const fragment_parts_t& parts)
	1124	+{
	1125	+ const needs_t& needs = mBuilderContext.needs;
	1126	+ if (GGL_READ_NEEDS(P_FOG, needs.p)) {
	1127	+ Scratch scratches(registerFile());
	1128	+ int dfdx = scratches.obtain();
	1129	+ int f = scratches.obtain();
	1130	+ mBuilderContext.Rctx = scratches.obtain();
	1131	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	1132	+ CONTEXT_LOAD(f, generated_vars.f);
	1133	+ CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
	1134	+ ADD_REG_TO_REG(dfdx, f);
	1135	+ CONTEXT_STORE(f, generated_vars.f);
	1136	+ scratches.recycle(mBuilderContext.Rctx);
	1137	+ scratches.recycle(dfdx);
	1138	+ scratches.recycle(f);
	1139	+ }
	1140	+}
	1141	+
	1142	+// ---------------------------------------------------------------------------
	1143	+
	1144	+void GGLX86Assembler::build_logic_op(pixel_t& pixel, Scratch& regs)
	1145	+{
	1146	+ const needs_t& needs = mBuilderContext.needs;
	1147	+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) \| GGL_CLEAR;
	1148	+ if (opcode == GGL_COPY)
	1149	+ return;
	1150	+
	1151	+ comment("logic operation");
	1152	+
	1153	+ pixel_t s(pixel);
	1154	+ if (!(pixel.flags & CORRUPTIBLE)) {
	1155	+ pixel.reg = regs.obtain();
	1156	+ pixel.flags \|= CORRUPTIBLE;
	1157	+ }
	1158	+
	1159	+ pixel_t d(mDstPixel);
	1160	+ d.reg = regs.obtain();
	1161	+ MOV_MEM_TO_REG(mDstPixel.offset_ebp, EBP, d.reg);
	1162	+ switch(opcode) {
	1163	+ case GGL_CLEAR:
	1164	+ MOV_IMM_TO_REG(0, pixel.reg);
	1165	+ break;
	1166	+ case GGL_AND:
	1167	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1168	+ AND_REG_TO_REG(s.reg, pixel.reg);
	1169	+ break;
	1170	+ case GGL_AND_REVERSE:
	1171	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1172	+ NOT(pixel.reg);
	1173	+ AND_REG_TO_REG(s.reg, pixel.reg);
	1174	+ break;
	1175	+ case GGL_COPY:
	1176	+ break;
	1177	+ case GGL_AND_INVERTED:
	1178	+ MOV_REG_TO_REG(s.reg, pixel.reg);
	1179	+ NOT(pixel.reg);
	1180	+ AND_REG_TO_REG(d.reg, pixel.reg);
	1181	+ break;
	1182	+ case GGL_NOOP:
	1183	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1184	+ break;
	1185	+ case GGL_XOR:
	1186	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1187	+ XOR(s.reg, pixel.reg);
	1188	+ break;
	1189	+ case GGL_OR:
	1190	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1191	+ OR_REG_TO_REG(s.reg, pixel.reg);
	1192	+ break;
	1193	+ case GGL_NOR:
	1194	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1195	+ OR_REG_TO_REG(s.reg, pixel.reg);
	1196	+ NOT(pixel.reg);
	1197	+ break;
	1198	+ case GGL_EQUIV:
	1199	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1200	+ XOR(s.reg, pixel.reg);
	1201	+ NOT(pixel.reg);
	1202	+ break;
	1203	+ case GGL_INVERT:
	1204	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1205	+ NOT(pixel.reg);
	1206	+ break;
	1207	+ case GGL_OR_REVERSE: // s \| ~d == ~(~s & d)
	1208	+ MOV_REG_TO_REG(s.reg, pixel.reg);
	1209	+ NOT(pixel.reg);
	1210	+ AND_REG_TO_REG(d.reg, pixel.reg);
	1211	+ NOT(pixel.reg);
	1212	+ break;
	1213	+ case GGL_COPY_INVERTED:
	1214	+ MOV_REG_TO_REG(s.reg, pixel.reg);
	1215	+ NOT(pixel.reg);
	1216	+ break;
	1217	+ case GGL_OR_INVERTED: // ~s \| d == ~(s & ~d)
	1218	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1219	+ NOT(pixel.reg);
	1220	+ AND_REG_TO_REG(s.reg, pixel.reg);
	1221	+ NOT(pixel.reg);
	1222	+ break;
	1223	+ case GGL_NAND:
	1224	+ MOV_REG_TO_REG(d.reg, pixel.reg);
	1225	+ AND_REG_TO_REG(s.reg, pixel.reg);
	1226	+ NOT(pixel.reg);
	1227	+ break;
	1228	+ case GGL_SET:
	1229	+ MOV_IMM_TO_REG(0, pixel.reg);
	1230	+ NOT(pixel.reg);
	1231	+ break;
	1232	+ };
	1233	+ regs.recycle(d.reg);
	1234	+}
	1235	+
	1236	+// ---------------------------------------------------------------------------
	1237	+
	1238	+
	1239	+void GGLX86Assembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
	1240	+{
	1241	+ uint32_t rot;
	1242	+ uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
	1243	+ mask &= size;
	1244	+
	1245	+ if (mask == size) {
	1246	+ if (d != s)
	1247	+ MOV_REG_TO_REG(s, d);
	1248	+ return;
	1249	+ }
	1250	+
	1251	+ MOV_REG_TO_REG(s, d);
	1252	+ AND_IMM_TO_REG(mask, d);
	1253	+}
	1254	+
	1255	+void GGLX86Assembler::build_masking(pixel_t& pixel, Scratch& regs)
	1256	+{
	1257	+ if (!mMasking \|\| mAllMasked) {
	1258	+ return;
	1259	+ }
	1260	+
	1261	+ comment("color mask");
	1262	+
	1263	+ pixel_t fb(mDstPixel);
	1264	+ fb.reg = regs.obtain();
	1265	+ MOV_MEM_TO_REG(mDstPixel.offset_ebp, EBP, fb.reg);
	1266	+ pixel_t s(pixel);
	1267	+ if (!(pixel.flags & CORRUPTIBLE)) {
	1268	+ pixel.reg = regs.obtain();
	1269	+ pixel.flags \|= CORRUPTIBLE;
	1270	+ }
	1271	+
	1272	+ int mask = 0;
	1273	+ for (int i=0 ; i<4 ; i++) {
	1274	+ const int component_mask = 1<<i;
	1275	+ const int h = fb.format.c[i].h;
	1276	+ const int l = fb.format.c[i].l;
	1277	+ if (h && (!(mMasking & component_mask))) {
	1278	+ mask \|= ((1<<(h-l))-1) << l;
	1279	+ }
	1280	+ }
	1281	+
	1282	+ // There is no need to clear the masked components of the source
	1283	+ // (unless we applied a logic op), because they're already zeroed
	1284	+ // by construction (masked components are not computed)
	1285	+
	1286	+ if (mLogicOp) {
	1287	+ const needs_t& needs = mBuilderContext.needs;
	1288	+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) \| GGL_CLEAR;
	1289	+ if (opcode != GGL_CLEAR) {
	1290	+ // clear masked component of source
	1291	+ build_and_immediate(pixel.reg, s.reg, mask, fb.size());
	1292	+ s = pixel;
	1293	+ }
	1294	+ }
	1295	+
	1296	+ // clear non masked components of destination
	1297	+ build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
	1298	+
	1299	+ // or back the channels that were masked
	1300	+ if (s.reg == fb.reg) {
	1301	+ // this is in fact a MOV
	1302	+ if (s.reg == pixel.reg) {
	1303	+ // ugh. this in in fact a nop
	1304	+ } else {
	1305	+ MOV_REG_TO_REG(fb.reg, pixel.reg);
	1306	+ }
	1307	+ } else {
	1308	+ MOV_REG_TO_REG(fb.reg, pixel.reg);
	1309	+ OR_REG_TO_REG(s.reg, pixel.reg);
	1310	+ }
	1311	+ MOV_REG_TO_MEM(fb.reg, mDstPixel.offset_ebp, EBP);
	1312	+}
	1313	+
	1314	+// ---------------------------------------------------------------------------
	1315	+
	1316	+void GGLX86Assembler::base_offset(pointer_t& d, pointer_t& b, const reg_t& o)
	1317	+{
	1318	+// d and b are the same reference
	1319	+ Scratch scratches(registerFile());
	1320	+ int temp_reg = scratches.obtain();
	1321	+ switch (b.size) {
	1322	+ case 32:
	1323	+ MOV_REG_TO_REG(b.reg, temp_reg);
	1324	+ MOV_REG_TO_REG(o.reg, d.reg);
	1325	+ SHL(2,d.reg);
	1326	+ ADD_REG_TO_REG(temp_reg, d.reg);
	1327	+ break;
	1328	+ case 24:
	1329	+ if (d.reg == b.reg) {
	1330	+ MOV_REG_TO_REG(b.reg, temp_reg);
	1331	+ MOV_REG_TO_REG(o.reg, d.reg);
	1332	+ SHL(1,d.reg);
	1333	+ ADD_REG_TO_REG(temp_reg, d.reg);
	1334	+ ADD_REG_TO_REG(o.reg, d.reg);
	1335	+ } else {
	1336	+ MOV_REG_TO_REG(o.reg, temp_reg);
	1337	+ SHL(1,temp_reg);
	1338	+ MOV_REG_TO_REG(temp_reg, d.reg);
	1339	+ ADD_REG_TO_REG(o.reg, d.reg);
	1340	+ ADD_REG_TO_REG(b.reg, d.reg);
	1341	+ }
	1342	+ break;
	1343	+ case 16:
	1344	+ MOV_REG_TO_REG(b.reg, temp_reg);
	1345	+ MOV_REG_TO_REG(o.reg, d.reg);
	1346	+ SHL(1,d.reg);
	1347	+ ADD_REG_TO_REG(temp_reg, d.reg);
	1348	+ break;
	1349	+ case 8:
	1350	+ MOV_REG_TO_REG(b.reg, temp_reg);
	1351	+ MOV_REG_TO_REG(o.reg, d.reg);
	1352	+ ADD_REG_TO_REG(temp_reg, d.reg);
	1353	+ break;
	1354	+ }
	1355	+ scratches.recycle(temp_reg);
	1356	+}
	1357	+
	1358	+// ----------------------------------------------------------------------------
	1359	+// cheezy register allocator...
	1360	+// ----------------------------------------------------------------------------
	1361	+
	1362	+void X86RegisterAllocator::reset()
	1363	+{
	1364	+ mRegs.reset();
	1365	+}
	1366	+
	1367	+int X86RegisterAllocator::reserveReg(int reg)
	1368	+{
	1369	+ return mRegs.reserve(reg);
	1370	+}
	1371	+
	1372	+int X86RegisterAllocator::obtainReg()
	1373	+{
	1374	+ return mRegs.obtain();
	1375	+}
	1376	+
	1377	+void X86RegisterAllocator::recycleReg(int reg)
	1378	+{
	1379	+ mRegs.recycle(reg);
	1380	+}
	1381	+
	1382	+X86RegisterAllocator::RegisterFile& X86RegisterAllocator::registerFile()
	1383	+{
	1384	+ return mRegs;
	1385	+}
	1386	+
	1387	+// ----------------------------------------------------------------------------
	1388	+
	1389	+X86RegisterAllocator::RegisterFile::RegisterFile()
	1390	+ : mRegs(0), mTouched(0), mStatus(0)
	1391	+{
	1392	+ //reserve(PhysicalReg_EBP);
	1393	+ //reserve(PhysicalReg_ESP);
	1394	+}
	1395	+
	1396	+X86RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
	1397	+ : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
	1398	+{
	1399	+}
	1400	+
	1401	+X86RegisterAllocator::RegisterFile::~RegisterFile()
	1402	+{
	1403	+}
	1404	+
	1405	+bool X86RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
	1406	+{
	1407	+ return (mRegs == rhs.mRegs);
	1408	+}
	1409	+
	1410	+void X86RegisterAllocator::RegisterFile::reset()
	1411	+{
	1412	+ mRegs = mTouched = mStatus = 0;
	1413	+}
	1414	+
	1415	+int X86RegisterAllocator::RegisterFile::reserve(int reg)
	1416	+{
	1417	+ LOG_ALWAYS_FATAL_IF(isUsed(reg),
	1418	+ "reserving register %d, but already in use",
	1419	+ reg);
	1420	+ if(isUsed(reg)) return -1;
	1421	+ mRegs \|= (1<<reg);
	1422	+ mTouched \|= mRegs;
	1423	+ return reg;
	1424	+}
	1425	+
	1426	+void X86RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
	1427	+{
	1428	+ mRegs \|= regMask;
	1429	+ mTouched \|= regMask;
	1430	+}
	1431	+
	1432	+int X86RegisterAllocator::RegisterFile::isUsed(int reg) const
	1433	+{
	1434	+ LOG_ALWAYS_FATAL_IF(reg>=6, "invalid register %d", reg);
	1435	+ return mRegs & (1<<reg);
	1436	+}
	1437	+
	1438	+int X86RegisterAllocator::RegisterFile::obtain()
	1439	+{
	1440	+//multiplication result is in edx:eax
	1441	+//ebx, ecx, edi, esi, eax, edx
	1442	+ const char priorityList[6] = { PhysicalReg_EBX, PhysicalReg_ECX,PhysicalReg_EDI, PhysicalReg_ESI, PhysicalReg_EAX, PhysicalReg_EDX };
	1443	+
	1444	+ const int nbreg = sizeof(priorityList);
	1445	+ int i, r;
	1446	+ for (i=0 ; i<nbreg ; i++) {
	1447	+ r = priorityList[i];
	1448	+ if (!isUsed(r)) {
	1449	+ break;
	1450	+ }
	1451	+ }
	1452	+ // this is not an error anymore because, we'll try again with
	1453	+ // a lower optimization level.
	1454	+ ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
	1455	+ if (i >= nbreg) {
	1456	+ mStatus \|= OUT_OF_REGISTERS;
	1457	+ // we return SP so we can more easily debug things
	1458	+ // the code will never be run anyway.
	1459	+ printf("pixelflinger ran out of registers\n");
	1460	+ return PhysicalReg_ESP;
	1461	+ //return -1;
	1462	+ }
	1463	+ reserve(r);
	1464	+ return r;
	1465	+}
	1466	+
	1467	+bool X86RegisterAllocator::RegisterFile::hasFreeRegs() const
	1468	+{
	1469	+ return ((mRegs & 0x3F) == 0x3F) ? false : true;
	1470	+}
	1471	+
	1472	+int X86RegisterAllocator::RegisterFile::countFreeRegs() const
	1473	+{
	1474	+ int f = ~mRegs & 0x3F;
	1475	+ // now count number of 1
	1476	+ f = (f & 0x5555) + ((f>>1) & 0x5555);
	1477	+ f = (f & 0x3333) + ((f>>2) & 0x3333);
	1478	+ f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
	1479	+ f = (f & 0x00FF) + ((f>>8) & 0x00FF);
	1480	+ return f;
	1481	+}
	1482	+
	1483	+void X86RegisterAllocator::RegisterFile::recycle(int reg)
	1484	+{
	1485	+ LOG_FATAL_IF(!isUsed(reg),
	1486	+ "recycling unallocated register %d",
	1487	+ reg);
	1488	+ mRegs &= ~(1<<reg);
	1489	+}
	1490	+
	1491	+void X86RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
	1492	+{
	1493	+ LOG_FATAL_IF((mRegs & regMask)!=regMask,
	1494	+ "recycling unallocated registers "
	1495	+ "(recycle=%08x, allocated=%08x, unallocated=%08x)",
	1496	+ regMask, mRegs, mRegs&regMask);
	1497	+ mRegs &= ~regMask;
	1498	+}
	1499	+
	1500	+uint32_t X86RegisterAllocator::RegisterFile::touched() const
	1501	+{
	1502	+ return mTouched;
	1503	+}
	1504	+
	1505	+// ----------------------------------------------------------------------------
	1506	+
	1507	+}; // namespace android

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/GGLX86Assembler.h

		@@ -0,0 +1,563 @@
	1	+/* libs/pixelflinger/codeflinger/x86/GGLX86Assembler.h
	2	+**
	3	+** Copyright 2006, The Android Open Source Project
	4	+**
	5	+** Licensed under the Apache License, Version 2.0 (the "License");
	6	+** you may not use this file except in compliance with the License.
	7	+** You may obtain a copy of the License at
	8	+**
	9	+** http://www.apache.org/licenses/LICENSE-2.0
	10	+**
	11	+** Unless required by applicable law or agreed to in writing, software
	12	+** distributed under the License is distributed on an "AS IS" BASIS,
	13	+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+** See the License for the specific language governing permissions and
	15	+** limitations under the License.
	16	+*/
	17	+
	18	+
	19	+#ifndef ANDROID_GGLX86ASSEMBLER_H
	20	+#define ANDROID_GGLX86ASSEMBLER_H
	21	+
	22	+#include <stdint.h>
	23	+#include <sys/types.h>
	24	+
	25	+#include <private/pixelflinger/ggl_context.h>
	26	+
	27	+#include "codeflinger/x86/X86Assembler.h"
	28	+
	29	+
	30	+namespace android {
	31	+
	32	+// ----------------------------------------------------------------------------
	33	+
	34	+#define CONTEXT_LOAD(REG, FIELD) \
	35	+ MOV_MEM_TO_REG(GGL_OFFSETOF(FIELD), mBuilderContext.Rctx, REG)
	36	+
	37	+#define CONTEXT_STORE(REG, FIELD) \
	38	+ MOV_REG_TO_MEM(REG, GGL_OFFSETOF(FIELD), mBuilderContext.Rctx)
	39	+
	40	+class X86RegisterAllocator
	41	+{
	42	+public:
	43	+ class RegisterFile;
	44	+
	45	+ RegisterFile& registerFile();
	46	+ int reserveReg(int reg);
	47	+ int obtainReg();
	48	+ void recycleReg(int reg);
	49	+ void reset();
	50	+
	51	+ class RegisterFile
	52	+ {
	53	+ public:
	54	+ RegisterFile();
	55	+ RegisterFile(const RegisterFile& rhs);
	56	+ ~RegisterFile();
	57	+
	58	+ void reset();
	59	+
	60	+ bool operator == (const RegisterFile& rhs) const;
	61	+ bool operator != (const RegisterFile& rhs) const {
	62	+ return !operator == (rhs);
	63	+ }
	64	+
	65	+ int reserve(int reg);
	66	+ void reserveSeveral(uint32_t regMask);
	67	+
	68	+ void recycle(int reg);
	69	+ void recycleSeveral(uint32_t regMask);
	70	+
	71	+ int obtain();
	72	+ inline int isUsed(int reg) const;
	73	+
	74	+ bool hasFreeRegs() const;
	75	+ int countFreeRegs() const;
	76	+
	77	+ uint32_t touched() const;
	78	+ inline uint32_t status() const { return mStatus; }
	79	+
	80	+ enum {
	81	+ OUT_OF_REGISTERS = 0x1
	82	+ };
	83	+
	84	+ private:
	85	+ uint32_t mRegs;
	86	+ uint32_t mTouched;
	87	+ uint32_t mStatus;
	88	+ };
	89	+
	90	+ class Scratch
	91	+ {
	92	+ public:
	93	+ Scratch(RegisterFile& regFile)
	94	+ : mRegFile(regFile), mScratch(0) {
	95	+ }
	96	+ ~Scratch() {
	97	+ mRegFile.recycleSeveral(mScratch);
	98	+ }
	99	+ int obtain() {
	100	+ int reg = mRegFile.obtain();
	101	+ mScratch \|= 1<<reg;
	102	+ return reg;
	103	+ }
	104	+ void reserve(int reg) {
	105	+ mRegFile.reserve(reg);
	106	+ mScratch \|= 1<<reg;
	107	+ }
	108	+ void recycle(int reg) {
	109	+ mRegFile.recycle(reg);
	110	+ mScratch &= ~(1<<reg);
	111	+ }
	112	+ bool isUsed(int reg) {
	113	+ return (mScratch & (1<<reg));
	114	+ }
	115	+ int countFreeRegs() {
	116	+ return mRegFile.countFreeRegs();
	117	+ }
	118	+ private:
	119	+ RegisterFile& mRegFile;
	120	+ uint32_t mScratch;
	121	+ };
	122	+
	123	+/*
	124	+// currently we don't use it
	125	+
	126	+ class Spill
	127	+ {
	128	+ public:
	129	+ Spill(RegisterFile& regFile, X86Assembler& gen, uint32_t reglist)
	130	+ : mRegFile(regFile), mGen(gen), mRegList(reglist), mCount(0)
	131	+ {
	132	+ if (reglist) {
	133	+ int count = 0;
	134	+ while (reglist) {
	135	+ count++;
	136	+ reglist &= ~(1 << (31 - __builtin_clz(reglist)));
	137	+ }
	138	+ if (count == 1) {
	139	+ int reg = 31 - __builtin_clz(mRegList);
	140	+ // move to the stack
	141	+ } else {
	142	+ // move to the stack
	143	+ }
	144	+ mRegFile.recycleSeveral(mRegList);
	145	+ mCount = count;
	146	+ }
	147	+ }
	148	+ ~Spill() {
	149	+ if (mRegList) {
	150	+ if (mCount == 1) {
	151	+ int reg = 31 - __builtin_clz(mRegList);
	152	+ // move to the stack
	153	+ } else {
	154	+ }
	155	+ mRegFile.reserveSeveral(mRegList);
	156	+ }
	157	+ }
	158	+ private:
	159	+ RegisterFile& mRegFile;
	160	+ X86Assembler& mGen;
	161	+ uint32_t mRegList;
	162	+ int mCount;
	163	+ };
	164	+*/
	165	+
	166	+private:
	167	+ RegisterFile mRegs;
	168	+};
	169	+
	170	+// ----------------------------------------------------------------------------
	171	+
	172	+class GGLX86Assembler : public X86Assembler, public X86RegisterAllocator
	173	+{
	174	+public:
	175	+
	176	+ GGLX86Assembler(const sp<Assembly>& assembly);
	177	+ ~GGLX86Assembler();
	178	+
	179	+ char* base() const { return 0; } // XXX
	180	+ char* pc() const { return 0; } // XXX
	181	+
	182	+ void reset(int opt_level);
	183	+
	184	+
	185	+ // generate scanline code for given needs
	186	+ int scanline(const needs_t& needs, context_t const* c);
	187	+ int scanline_core(const needs_t& needs, context_t const* c);
	188	+
	189	+ enum {
	190	+ CLEAR_LO = 0x0001,
	191	+ CLEAR_HI = 0x0002,
	192	+ CORRUPTIBLE = 0x0004,
	193	+ FIRST = 0x0008
	194	+ };
	195	+
	196	+ enum { //load/store flags
	197	+ WRITE_BACK = 0x0001
	198	+ };
	199	+
	200	+ struct reg_t {
	201	+ reg_t() : reg(-1), flags(0), offset_ebp(0) {
	202	+ }
	203	+ reg_t(int r, int f=0, int offset=0)
	204	+ : reg(r), flags(f), offset_ebp(offset) {
	205	+ }
	206	+ void setTo(int r, int f=0, int offset=0) {
	207	+ reg=r; flags=f; offset_ebp=offset;
	208	+ }
	209	+ int reg;
	210	+ uint16_t flags;
	211	+ int offset_ebp;
	212	+ };
	213	+
	214	+ struct integer_t : public reg_t {
	215	+ integer_t() : reg_t(), s(0) {
	216	+ }
	217	+ integer_t(int r, int sz=32, int f=0, int offset=0)
	218	+ : reg_t(r, f, offset), s(sz) {
	219	+ }
	220	+ void setTo(int r, int sz=32, int f=0, int offset=0) {
	221	+ reg_t::setTo(r, f, offset); s=sz;
	222	+ }
	223	+ int8_t s;
	224	+ inline int size() const { return s; }
	225	+ };
	226	+
	227	+ struct pixel_t : public reg_t {
	228	+ pixel_t() : reg_t() {
	229	+ memset(&format, 0, sizeof(GGLFormat));
	230	+ }
	231	+ pixel_t(int r, const GGLFormat* fmt, int f=0, int offset=0)
	232	+ : reg_t(r, f, offset), format(*fmt) {
	233	+ }
	234	+ void setTo(int r, const GGLFormat* fmt, int f=0, int offset=0) {
	235	+ reg_t::setTo(r, f, offset); format = *fmt;
	236	+ }
	237	+ GGLFormat format;
	238	+ inline int hi(int c) const { return format.c[c].h; }
	239	+ inline int low(int c) const { return format.c[c].l; }
	240	+ inline int mask(int c) const { return ((1<<size(c))-1) << low(c); }
	241	+ inline int size() const { return format.size*8; }
	242	+ inline int size(int c) const { return component_size(c); }
	243	+ inline int component_size(int c) const { return hi(c) - low(c); }
	244	+ };
	245	+
	246	+ struct component_t : public reg_t {
	247	+ component_t() : reg_t(), h(0), l(0) {
	248	+ }
	249	+ component_t(int r, int f=0, int offset=0)
	250	+ : reg_t(r, f, offset), h(0), l(0) {
	251	+ }
	252	+ component_t(int r, int lo, int hi, int f=0, int offset=0)
	253	+ : reg_t(r, f, offset), h(hi), l(lo) {
	254	+ }
	255	+ explicit component_t(const integer_t& rhs)
	256	+ : reg_t(rhs.reg, rhs.flags, rhs.offset_ebp), h(rhs.s), l(0) {
	257	+ }
	258	+ explicit component_t(const pixel_t& rhs, int component) {
	259	+ setTo( rhs.reg,
	260	+ rhs.format.c[component].l,
	261	+ rhs.format.c[component].h,
	262	+ rhs.flags\|CLEAR_LO\|CLEAR_HI, rhs.offset_ebp);
	263	+ }
	264	+ void setTo(int r, int lo=0, int hi=0, int f=0, int offset=0) {
	265	+ reg_t::setTo(r, f, offset); h=hi; l=lo;
	266	+ }
	267	+ int8_t h;
	268	+ int8_t l;
	269	+ inline int size() const { return h-l; }
	270	+ };
	271	+
	272	+ struct pointer_t : public reg_t {
	273	+ pointer_t() : reg_t(), size(0) {
	274	+ }
	275	+ pointer_t(int r, int s, int f=0, int offset=0)
	276	+ : reg_t(r, f, offset), size(s) {
	277	+ }
	278	+ void setTo(int r, int s, int f=0, int offset=0) {
	279	+ reg_t::setTo(r, f, offset); size=s;
	280	+ }
	281	+ int8_t size;
	282	+ };
	283	+
	284	+
	285	+private:
	286	+ struct tex_coord_t {
	287	+ reg_t s;
	288	+ reg_t t;
	289	+ pointer_t ptr;
	290	+ };
	291	+
	292	+ struct fragment_parts_t {
	293	+ uint32_t packed : 1;
	294	+ uint32_t reload : 2;
	295	+ uint32_t iterated_packed : 1;
	296	+ pixel_t iterated;
	297	+ pointer_t cbPtr;
	298	+ pointer_t covPtr;
	299	+ reg_t count;
	300	+ reg_t argb[4];
	301	+ reg_t argb_dx[4];
	302	+ reg_t z;
	303	+ reg_t dither;
	304	+ pixel_t texel[GGL_TEXTURE_UNIT_COUNT];
	305	+ tex_coord_t coords[GGL_TEXTURE_UNIT_COUNT];
	306	+ };
	307	+
	308	+ struct texture_unit_t {
	309	+ int format_idx;
	310	+ GGLFormat format;
	311	+ int bits;
	312	+ int swrap;
	313	+ int twrap;
	314	+ int env;
	315	+ int pot;
	316	+ int linear;
	317	+ uint8_t mask;
	318	+ uint8_t replaced;
	319	+ };
	320	+
	321	+ struct texture_machine_t {
	322	+ texture_unit_t tmu[GGL_TEXTURE_UNIT_COUNT];
	323	+ uint8_t mask;
	324	+ uint8_t replaced;
	325	+ uint8_t directTexture;
	326	+ uint8_t activeUnits;
	327	+ };
	328	+
	329	+ struct component_info_t {
	330	+ bool masked : 1;
	331	+ bool inDest : 1;
	332	+ bool needed : 1;
	333	+ bool replaced : 1;
	334	+ bool iterated : 1;
	335	+ bool smooth : 1;
	336	+ bool blend : 1;
	337	+ bool fog : 1;
	338	+ };
	339	+
	340	+ struct builder_context_t {
	341	+ context_t const* c;
	342	+ needs_t needs;
	343	+ int Rctx;
	344	+ };
	345	+
	346	+ template <typename T>
	347	+ void modify(T& r, Scratch& regs)
	348	+ {
	349	+ if (!(r.flags & CORRUPTIBLE)) {
	350	+ r.reg = regs.obtain();
	351	+ r.flags \|= CORRUPTIBLE;
	352	+ }
	353	+ }
	354	+
	355	+ // helpers
	356	+ void base_offset(pointer_t& d, pointer_t& b, const reg_t& o);
	357	+
	358	+ // texture environement
	359	+ void modulate( component_t& dest,
	360	+ const component_t& incoming,
	361	+ const pixel_t& texel, int component);
	362	+
	363	+ void decal( component_t& dest,
	364	+ const component_t& incoming,
	365	+ const pixel_t& texel, int component);
	366	+
	367	+ void blend( component_t& dest,
	368	+ const component_t& incoming,
	369	+ const pixel_t& texel, int component, int tmu);
	370	+
	371	+ void add( component_t& dest,
	372	+ const component_t& incoming,
	373	+ const pixel_t& texel, int component);
	374	+
	375	+ // load/store stuff
	376	+ void store(const pointer_t& addr, const pixel_t& src, uint32_t flags=0);
	377	+ void load(pointer_t& addr, const pixel_t& dest, uint32_t flags=0);
	378	+
	379	+ void extract(integer_t& d, const pixel_t& s, int component);
	380	+ void extract(component_t& d, const pixel_t& s, int component);
	381	+ void extract(integer_t& d, int s, int h, int l, int bits=32);
	382	+ void expand(integer_t& d, const integer_t& s, int dbits);
	383	+ void expand(integer_t& d, const component_t& s, int dbits);
	384	+ void expand(component_t& d, const component_t& s, int dbits);
	385	+ void downshift(pixel_t& d, int component, component_t s, reg_t& dither);
	386	+
	387	+
	388	+ void mul_factor( component_t& d,
	389	+ const integer_t& v,
	390	+ const integer_t& f, Scratch& scratches);
	391	+
	392	+ void mul_factor_add( component_t& d,
	393	+ const integer_t& v,
	394	+ const integer_t& f,
	395	+ const component_t& a);
	396	+
	397	+ void component_add( component_t& d,
	398	+ const integer_t& dst,
	399	+ const integer_t& src);
	400	+
	401	+ void component_sat( const component_t& v, const int temp_reg);
	402	+
	403	+
	404	+ void build_scanline_preparation(fragment_parts_t& parts,
	405	+ const needs_t& needs);
	406	+
	407	+ void build_smooth_shade(fragment_parts_t& parts);
	408	+
	409	+ void build_component( pixel_t& pixel,
	410	+ fragment_parts_t& parts,
	411	+ int component,
	412	+ Scratch& global_scratches);
	413	+
	414	+ void build_incoming_component(
	415	+ component_t& temp,
	416	+ int dst_size,
	417	+ fragment_parts_t& parts,
	418	+ int component,
	419	+ Scratch& scratches,
	420	+ Scratch& global_scratches);
	421	+
	422	+ void init_iterated_color(fragment_parts_t& parts, const reg_t& x);
	423	+
	424	+ void build_iterated_color( component_t& fragment,
	425	+ fragment_parts_t& parts,
	426	+ int component,
	427	+ Scratch& regs);
	428	+
	429	+ void decodeLogicOpNeeds(const needs_t& needs);
	430	+
	431	+ void decodeTMUNeeds(const needs_t& needs, context_t const* c);
	432	+
	433	+ void init_textures( tex_coord_t* coords,
	434	+ const reg_t& x,
	435	+ const reg_t& y);
	436	+
	437	+ void build_textures( fragment_parts_t& parts,
	438	+ Scratch& regs);
	439	+
	440	+ void filter8( const fragment_parts_t& parts,
	441	+ pixel_t& texel, const texture_unit_t& tmu,
	442	+ reg_t reg_U, reg_t reg_V, pointer_t& txPtr,
	443	+ int FRAC_BITS, Scratch& scratches);
	444	+
	445	+ void filter16( const fragment_parts_t& parts,
	446	+ pixel_t& texel, const texture_unit_t& tmu,
	447	+ reg_t reg_U, reg_t reg_V, pointer_t& txPtr,
	448	+ int FRAC_BITS, Scratch& scratches);
	449	+
	450	+ void filter24( const fragment_parts_t& parts,
	451	+ pixel_t& texel, const texture_unit_t& tmu,
	452	+ int U, int V, pointer_t& txPtr,
	453	+ int FRAC_BITS);
	454	+
	455	+ void filter32( const fragment_parts_t& parts,
	456	+ pixel_t& texel, const texture_unit_t& tmu,
	457	+ reg_t reg_U, reg_t reg_V, pointer_t& txPtr,
	458	+ int FRAC_BITS, Scratch& scratches);
	459	+
	460	+ void build_texture_environment( component_t& fragment,
	461	+ fragment_parts_t& parts,
	462	+ int component,
	463	+ Scratch& regs);
	464	+
	465	+ void wrapping( int d,
	466	+ int coord, int size,
	467	+ int tx_wrap, int tx_linear, Scratch& scratches);
	468	+
	469	+ void build_fog( component_t& temp,
	470	+ int component,
	471	+ Scratch& parent_scratches);
	472	+
	473	+ void build_blending( component_t& in_out,
	474	+ pixel_t& pixel,
	475	+ int component,
	476	+ Scratch& parent_scratches);
	477	+
	478	+ void build_blend_factor(
	479	+ integer_t& factor, int f, int component,
	480	+ const pixel_t& dst_pixel,
	481	+ integer_t& fragment,
	482	+ integer_t& fb,
	483	+ Scratch& scratches);
	484	+
	485	+ void build_blendFOneMinusF( component_t& temp,
	486	+ const integer_t& factor,
	487	+ const integer_t& fragment,
	488	+ const integer_t& fb);
	489	+
	490	+ void build_blendOneMinusFF( component_t& temp,
	491	+ const integer_t& factor,
	492	+ const integer_t& fragment,
	493	+ const integer_t& fb);
	494	+
	495	+ void build_coverage_application(component_t& fragment,
	496	+ fragment_parts_t& parts,
	497	+ Scratch& regs);
	498	+
	499	+ void build_alpha_test(component_t& fragment, const fragment_parts_t& parts);
	500	+
	501	+ enum { Z_TEST=1, Z_WRITE=2 };
	502	+ void build_depth_test(const fragment_parts_t& parts, uint32_t mask);
	503	+ void build_iterate_z(const fragment_parts_t& parts);
	504	+ void build_iterate_f(const fragment_parts_t& parts);
	505	+ void build_iterate_texture_coordinates(const fragment_parts_t& parts);
	506	+
	507	+ void build_logic_op(pixel_t& pixel, Scratch& regs);
	508	+
	509	+ void build_masking(pixel_t& pixel, Scratch& regs);
	510	+
	511	+ void build_and_immediate(int d, int s, uint32_t mask, int bits);
	512	+
	513	+ bool isAlphaSourceNeeded() const;
	514	+
	515	+ enum {
	516	+ FACTOR_SRC=1, FACTOR_DST=2, BLEND_SRC=4, BLEND_DST=8
	517	+ };
	518	+
	519	+ enum {
	520	+ LOGIC_OP=1, LOGIC_OP_SRC=2, LOGIC_OP_DST=4
	521	+ };
	522	+
	523	+ static int blending_codes(int fs, int fd);
	524	+
	525	+ builder_context_t mBuilderContext;
	526	+ texture_machine_t mTextureMachine;
	527	+ component_info_t mInfo[4];
	528	+ int mBlending;
	529	+ int mMasking;
	530	+ int mAllMasked;
	531	+ int mLogicOp;
	532	+ int mAlphaTest;
	533	+ int mAA;
	534	+ int mDithering;
	535	+ int mDepthTest;
	536	+
	537	+ int mSmooth;
	538	+ int mFog;
	539	+ pixel_t mDstPixel;
	540	+
	541	+ GGLFormat mCbFormat;
	542	+
	543	+ int mBlendFactorCached;
	544	+ integer_t mAlphaSource;
	545	+
	546	+ int mBaseRegister;
	547	+
	548	+ int mBlendSrc;
	549	+ int mBlendDst;
	550	+ int mBlendSrcA;
	551	+ int mBlendDstA;
	552	+
	553	+ int mOptLevel;
	554	+
	555	+ // to stretch esp and shrink esp
	556	+ int mCurSp;
	557	+};
	558	+
	559	+// ----------------------------------------------------------------------------
	560	+
	561	+}; // namespace android
	562	+
	563	+#endif // ANDROID_GGLX86ASSEMBLER_H

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/X86Assembler.cpp

		@@ -0,0 +1,618 @@
	1	+/* libs/pixelflinger/codeflinger/x86/X86Assembler.cpp
	2	+**
	3	+** Copyright 2006, The Android Open Source Project
	4	+**
	5	+** Licensed under the Apache License, Version 2.0 (the "License");
	6	+** you may not use this file except in compliance with the License.
	7	+** You may obtain a copy of the License at
	8	+**
	9	+** http://www.apache.org/licenses/LICENSE-2.0
	10	+**
	11	+** Unless required by applicable law or agreed to in writing, software
	12	+** distributed under the License is distributed on an "AS IS" BASIS,
	13	+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+** See the License for the specific language governing permissions and
	15	+** limitations under the License.
	16	+*/
	17	+
	18	+#define LOG_TAG "X86Assembler"
	19	+
	20	+#include <stdio.h>
	21	+#include <stdlib.h>
	22	+#include <cutils/log.h>
	23	+#include <cutils/properties.h>
	24	+#include <string.h>
	25	+
	26	+#if defined(WITH_LIB_HARDWARE)
	27	+#include <hardware_legacy/qemu_tracing.h>
	28	+#endif
	29	+
	30	+#include <private/pixelflinger/ggl_context.h>
	31	+
	32	+#include "codeflinger/CodeCache.h"
	33	+#include "codeflinger/x86/X86Assembler.h"
	34	+
	35	+// ----------------------------------------------------------------------------
	36	+
	37	+namespace android {
	38	+
	39	+// ----------------------------------------------------------------------------
	40	+
	41	+X86Assembler::X86Assembler(const sp<Assembly>& assembly)
	42	+ : mAssembly(assembly)
	43	+{
	44	+ mBase = mStream = (char *)assembly->base();
	45	+ mDuration = ggl_system_time();
	46	+#if defined(WITH_LIB_HARDWARE)
	47	+ mQemuTracing = true;
	48	+#endif
	49	+}
	50	+
	51	+X86Assembler::~X86Assembler()
	52	+{
	53	+}
	54	+
	55	+char* X86Assembler::pc() const
	56	+{
	57	+ return mStream;
	58	+}
	59	+
	60	+char* X86Assembler::base() const
	61	+{
	62	+ return mBase;
	63	+}
	64	+
	65	+void X86Assembler::reset()
	66	+{
	67	+ mBase = mStream = (char *)mAssembly->base();
	68	+ mBranchTargets.clear();
	69	+ mLabels.clear();
	70	+ mLabelsInverseMapping.clear();
	71	+ mComments.clear();
	72	+}
	73	+
	74	+// ----------------------------------------------------------------------------
	75	+
	76	+void X86Assembler::disassemble(const char* name)
	77	+{
	78	+ if (name) {
	79	+ printf("%s:\n", name);
	80	+ }
	81	+ size_t count = pc()-base();
	82	+ unsigned insLength;
	83	+ unsigned insSize;
	84	+ char* curStream = (char*)base();
	85	+ while (count>0) {
	86	+ ssize_t label = mLabelsInverseMapping.indexOfKey(curStream);
	87	+ if (label >= 0) {
	88	+ printf("%s:\n", mLabelsInverseMapping.valueAt(label));
	89	+ }
	90	+ ssize_t comment = mComments.indexOfKey(curStream);
	91	+ if (comment >= 0) {
	92	+ printf("; %s\n", mComments.valueAt(comment));
	93	+ }
	94	+ insLength = decodeThenPrint(curStream);
	95	+ curStream = curStream + insLength;
	96	+ count = count - insLength;
	97	+ }
	98	+}
	99	+
	100	+void X86Assembler::comment(const char* string)
	101	+{
	102	+ mComments.add(mStream, string);
	103	+}
	104	+
	105	+void X86Assembler::label(const char* theLabel)
	106	+{
	107	+ mLabels.add(theLabel, mStream);
	108	+ mLabelsInverseMapping.add(mStream, theLabel);
	109	+}
	110	+
	111	+//the conditional jump
	112	+void X86Assembler::JCC(Mnemonic cc, const char* label) {
	113	+ switch (cc) {
	114	+ case Mnemonic_JO:
	115	+ encoder_imm(Mnemonic_JO, OpndSize_32, 0/imm/, mStream);
	116	+ break;
	117	+ case Mnemonic_JNO:
	118	+ encoder_imm(Mnemonic_JNO, OpndSize_32, 0/imm/, mStream);
	119	+ break;
	120	+ case Mnemonic_JB:
	121	+ encoder_imm(Mnemonic_JB, OpndSize_32, 0/imm/, mStream);
	122	+ break;
	123	+ case Mnemonic_JNB:
	124	+ encoder_imm(Mnemonic_JNB, OpndSize_32, 0/imm/, mStream);
	125	+ break;
	126	+ case Mnemonic_JZ:
	127	+ encoder_imm(Mnemonic_JZ, OpndSize_32, 0/imm/, mStream);
	128	+ break;
	129	+ case Mnemonic_JNZ:
	130	+ encoder_imm(Mnemonic_JNZ, OpndSize_32, 0/imm/, mStream);
	131	+ break;
	132	+ case Mnemonic_JBE:
	133	+ encoder_imm(Mnemonic_JBE, OpndSize_32, 0/imm/, mStream);
	134	+ break;
	135	+ case Mnemonic_JNBE:
	136	+ encoder_imm(Mnemonic_JNBE, OpndSize_32, 0/imm/, mStream);
	137	+ break;
	138	+ case Mnemonic_JS:
	139	+ encoder_imm(Mnemonic_JS, OpndSize_32, 0/imm/, mStream);
	140	+ break;
	141	+ case Mnemonic_JNS:
	142	+ encoder_imm(Mnemonic_JNS, OpndSize_32, 0/imm/, mStream);
	143	+ break;
	144	+ case Mnemonic_JP:
	145	+ encoder_imm(Mnemonic_JP, OpndSize_32, 0/imm/, mStream);
	146	+ break;
	147	+ case Mnemonic_JNP:
	148	+ encoder_imm(Mnemonic_JNP, OpndSize_32, 0/imm/, mStream);
	149	+ break;
	150	+ case Mnemonic_JL:
	151	+ encoder_imm(Mnemonic_JL, OpndSize_32, 0/imm/, mStream);
	152	+ break;
	153	+ case Mnemonic_JNL:
	154	+ encoder_imm(Mnemonic_JNL, OpndSize_32, 0/imm/, mStream);
	155	+ break;
	156	+ case Mnemonic_JLE:
	157	+ encoder_imm(Mnemonic_JLE, OpndSize_32, 0/imm/, mStream);
	158	+ break;
	159	+ case Mnemonic_JNLE:
	160	+ encoder_imm(Mnemonic_JNLE, OpndSize_32, 0/imm/, mStream);
	161	+ break;
	162	+ default :
	163	+ printf("the condition is not supported.\n");
	164	+ return;
	165	+ }
	166	+ mStreamNext = mStream + encoder_get_inst_size(mStream);
	167	+ //the offset is relative to the next instruction of the current PC
	168	+ mBranchTargets.add(branch_target_t(label, mStream, mStreamNext));
	169	+ mStream = mStreamNext;
	170	+}
	171	+
	172	+void X86Assembler::JMP(const char* label) {
	173	+ encoder_imm(Mnemonic_JMP, OpndSize_32, 0/imm/, mStream);
	174	+ mStreamNext = mStream + encoder_get_inst_size(mStream);
	175	+ mBranchTargets.add(branch_target_t(label, mStream, mStreamNext));
	176	+ mStream = mStreamNext;
	177	+}
	178	+
	179	+void X86Assembler::prepare_esp(int old_offset)
	180	+{
	181	+ mStreamUpdate = mStream;
	182	+ SUB_IMM_TO_REG(old_offset, ESP);
	183	+}
	184	+
	185	+void X86Assembler::update_esp(int new_offset)
	186	+{
	187	+ encoder_update_imm_rm(new_offset, mStreamUpdate);
	188	+}
	189	+
	190	+void X86Assembler::shrink_esp(int shrink_offset)
	191	+{
	192	+ ADD_IMM_TO_REG(shrink_offset, ESP);
	193	+}
	194	+
	195	+void X86Assembler::callee_work()
	196	+{
	197	+ //push EBX, ESI, EDI which need to be done in callee
	198	+ /*
	199	+ push %ebp
	200	+ mov %esp,%ebp
	201	+ push %ebx
	202	+ push %esi
	203	+ push %edi
	204	+ */
	205	+ PUSH(EBP);
	206	+ MOV_REG_TO_REG(ESP, EBP);
	207	+ PUSH(EBX);
	208	+ PUSH(ESI);
	209	+ PUSH(EDI);
	210	+}
	211	+
	212	+void X86Assembler::return_work()
	213	+{
	214	+// pop %esi
	215	+// pop %edi
	216	+// pop %ebx
	217	+// movl %ebp,%esp
	218	+// pop %ebp
	219	+// ret
	220	+// ret is equivalent to below
	221	+// pop %eax // the return address
	222	+// jmp *%eax
	223	+ POP(EDI);
	224	+ POP(ESI);
	225	+ POP(EBX);
	226	+ POP(EBP);
	227	+ encoder_return(mStream);
	228	+ mStream = mStream + encoder_get_inst_size(mStream);
	229	+}
	230	+
	231	+int X86Assembler::generate(const char* name)
	232	+{
	233	+ // fixup all the branches
	234	+ size_t count = mBranchTargets.size();
	235	+ while (count--) {
	236	+ const branch_target_t& bt = mBranchTargets[count];
	237	+ char* target_pc = mLabels.valueFor(bt.label);
	238	+ LOG_ALWAYS_FATAL_IF(!target_pc,
	239	+ "error resolving branch targets, target_pc is null");
	240	+ //the offset is relative to the next instruction of the current PC
	241	+ int32_t offset = int32_t(target_pc - bt.next_pc);
	242	+ encoder_update_imm(offset, bt.pc);
	243	+ }
	244	+
	245	+ mAssembly->resize((int)(pc()-base()));
	246	+
	247	+ // the instruction cache is flushed by CodeCache
	248	+ const int64_t duration = ggl_system_time() - mDuration;
	249	+ const char * const format = "generated %s (%d ins size) at [%p:%p] in %lld ns\n";
	250	+ ALOGI(format, name, int(pc()-base()), base(), pc(), duration);
	251	+
	252	+#if defined(WITH_LIB_HARDWARE)
	253	+ if (__builtin_expect(mQemuTracing, 0)) {
	254	+ int err = qemu_add_mapping(uintptr_t(base()), name);
	255	+ mQemuTracing = (err >= 0);
	256	+ }
	257	+#endif
	258	+
	259	+ char value[PROPERTY_VALUE_MAX];
	260	+ property_get("debug.pf.disasm", value, "0");
	261	+ if (atoi(value) != 0) {
	262	+ printf(format, name, int(pc()-base()), base(), pc(), duration);
	263	+ disassemble(name);
	264	+ }
	265	+
	266	+ return NO_ERROR;
	267	+}
	268	+
	269	+char* X86Assembler::pcForLabel(const char* label)
	270	+{
	271	+ return mLabels.valueFor(label);
	272	+}
	273	+
	274	+// ----------------------------------------------------------------------------
	275	+
	276	+void X86Assembler::PUSH(int reg) {
	277	+ encoder_reg(Mnemonic_PUSH, OpndSize_32, reg, 0/isPhysical/, LowOpndRegType_gp, mStream);
	278	+ mStream = mStream + encoder_get_inst_size(mStream);
	279	+}
	280	+
	281	+void X86Assembler::POP(int reg) {
	282	+ encoder_reg(Mnemonic_POP, OpndSize_32, reg, 0/isPhysical/, LowOpndRegType_gp, mStream);
	283	+ mStream = mStream + encoder_get_inst_size(mStream);
	284	+}
	285	+
	286	+//arithmetic
	287	+void X86Assembler::ADD_REG_TO_REG(int src, int dst) {
	288	+ encoder_reg_reg(Mnemonic_ADD, OpndSize_32, src, 0/isPhysical/, dst/dst is the destination/, 0/isPhysical2/,LowOpndRegType_gp, mStream);
	289	+ mStream = mStream + encoder_get_inst_size(mStream);
	290	+}
	291	+
	292	+void X86Assembler::ADD_IMM_TO_REG(int imm, int dst) {
	293	+ encoder_imm_reg(Mnemonic_ADD, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	294	+ mStream = mStream + encoder_get_inst_size(mStream);
	295	+}
	296	+
	297	+void X86Assembler::ADD_IMM_TO_MEM(int imm, int disp, int dst) {
	298	+ encoder_imm_mem(Mnemonic_ADD, OpndSize_32, imm, disp, dst, 0/isBasePhysical/, mStream);
	299	+ mStream = mStream + encoder_get_inst_size(mStream);
	300	+}
	301	+
	302	+void X86Assembler::ADD_MEM_TO_REG(int base_reg, int disp, int dst) {
	303	+ encoder_mem_reg(Mnemonic_ADD, OpndSize_32, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/,LowOpndRegType_gp, mStream);
	304	+ mStream = mStream + encoder_get_inst_size(mStream);
	305	+}
	306	+
	307	+void X86Assembler::ADD_REG_TO_MEM(int src, int base_reg, int disp) {
	308	+ encoder_reg_mem(Mnemonic_ADD, OpndSize_32, src, 0/isPhysical/, disp, base_reg, 0/isBasePhysical/, LowOpndRegType_gp, mStream);
	309	+ mStream = mStream + encoder_get_inst_size(mStream);
	310	+}
	311	+
	312	+void X86Assembler::SUB_REG_TO_REG(int src, int dst) {
	313	+ encoder_reg_reg(Mnemonic_SUB, OpndSize_32, src, 0/isPhysical/, dst/dst is the destination/, 0/isPhysical2/,LowOpndRegType_gp, mStream);
	314	+ mStream = mStream + encoder_get_inst_size(mStream);
	315	+}
	316	+
	317	+void X86Assembler::SUB_IMM_TO_REG(int imm, int dst) {
	318	+ encoder_imm_reg(Mnemonic_SUB, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	319	+ mStream = mStream + encoder_get_inst_size(mStream);
	320	+}
	321	+
	322	+void X86Assembler::SUB_IMM_TO_MEM(int imm, int disp, int dst) {
	323	+ encoder_imm_mem(Mnemonic_SUB, OpndSize_32, imm, disp, dst, 0/isBasePhysical/, mStream);
	324	+ mStream = mStream + encoder_get_inst_size(mStream);
	325	+}
	326	+
	327	+void X86Assembler::SUB_REG_TO_MEM(int src, int base_reg, int disp) {
	328	+ encoder_reg_mem(Mnemonic_SUB, OpndSize_32, src, 0/isPhysical/, disp, base_reg, 0/isBasePhysical/, LowOpndRegType_gp, mStream);
	329	+ mStream = mStream + encoder_get_inst_size(mStream);
	330	+}
	331	+
	332	+//test
	333	+void X86Assembler::TEST_REG_TO_REG(int src, int dst, OpndSize size) {
	334	+ encoder_reg_reg(Mnemonic_TEST, size, src, 0/isPhysical/, dst/dst is the destination/, 0/isPhysical2/,LowOpndRegType_gp, mStream);
	335	+ mStream = mStream + encoder_get_inst_size(mStream);
	336	+}
	337	+
	338	+//compare
	339	+void X86Assembler::CMP_REG_TO_REG(int src, int dst, OpndSize size) {
	340	+ encoder_reg_reg(Mnemonic_CMP, size, src, 0/isPhysical/, dst/dst is the destination/, 0/isPhysical2/,LowOpndRegType_gp, mStream);
	341	+ mStream = mStream + encoder_get_inst_size(mStream);
	342	+}
	343	+
	344	+void X86Assembler::CMP_IMM_TO_REG(int imm, int dst) {
	345	+ encoder_imm_reg(Mnemonic_CMP, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	346	+ mStream = mStream + encoder_get_inst_size(mStream);
	347	+}
	348	+
	349	+void X86Assembler::CMP_MEM_TO_REG(int base_reg, int disp, int dst, OpndSize size) {
	350	+ encoder_mem_reg(Mnemonic_CMP, size, disp, base_reg, 0/isBasePhysical/,
	351	+ dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	352	+ mStream = mStream + encoder_get_inst_size(mStream);
	353	+}
	354	+
	355	+void X86Assembler::CMP_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size)
	356	+{
	357	+ encoder_reg_mem(Mnemonic_CMP, size, reg, 0/isPhysical/, disp, base_reg, 0/isBasePhysical/, LowOpndRegType_gp, mStream);
	358	+ mStream = mStream + encoder_get_inst_size(mStream);
	359	+}
	360	+
	361	+//logical
	362	+void X86Assembler::AND_REG_TO_REG(int src, int dst) {
	363	+ encoder_reg_reg(Mnemonic_AND, OpndSize_32, src, 0/isPhysical/, dst/dst is the destination/, 0/isPhysical2/,LowOpndRegType_gp, mStream);
	364	+ mStream = mStream + encoder_get_inst_size(mStream);
	365	+}
	366	+
	367	+void X86Assembler::AND_IMM_TO_REG(int imm, int dst) {
	368	+ encoder_imm_reg(Mnemonic_AND, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	369	+ mStream = mStream + encoder_get_inst_size(mStream);
	370	+}
	371	+
	372	+void X86Assembler::OR_REG_TO_REG(int src, int dst) {
	373	+ encoder_reg_reg(Mnemonic_OR, OpndSize_32, src, 0/isPhysical/, dst/dst is the destination/, 0/isPhysical2/,LowOpndRegType_gp, mStream);
	374	+ mStream = mStream + encoder_get_inst_size(mStream);
	375	+}
	376	+
	377	+void X86Assembler::XOR(int src, int dst) {
	378	+ encoder_reg_reg(Mnemonic_XOR, OpndSize_32, src, 0/isPhysical/, dst/dst is the destination/, 0/isPhysical2/,LowOpndRegType_gp, mStream);
	379	+ mStream = mStream + encoder_get_inst_size(mStream);
	380	+}
	381	+
	382	+void X86Assembler::OR_IMM_TO_REG(int imm, int dst) {
	383	+ encoder_imm_reg(Mnemonic_OR, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	384	+ mStream = mStream + encoder_get_inst_size(mStream);
	385	+}
	386	+
	387	+void X86Assembler::NOT(int dst) {
	388	+ encoder_reg(Mnemonic_NOT, OpndSize_32, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	389	+ mStream = mStream + encoder_get_inst_size(mStream);
	390	+}
	391	+
	392	+void X86Assembler::NEG(int dst) {
	393	+ encoder_reg(Mnemonic_NEG, OpndSize_32, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	394	+ mStream = mStream + encoder_get_inst_size(mStream);
	395	+}
	396	+//shift
	397	+void X86Assembler::SHL(int imm, int dst) {
	398	+ encoder_imm_reg(Mnemonic_SHL, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	399	+ mStream = mStream + encoder_get_inst_size(mStream);
	400	+}
	401	+
	402	+void X86Assembler::SHL(int imm, int disp, int dst) {
	403	+ encoder_imm_mem(Mnemonic_SHL, OpndSize_32, imm, disp, dst, 0/isBasePhysical/, mStream);
	404	+ mStream = mStream + encoder_get_inst_size(mStream);
	405	+}
	406	+
	407	+void X86Assembler::SHR(int imm, int dst) {
	408	+ encoder_imm_reg(Mnemonic_SHR, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	409	+ mStream = mStream + encoder_get_inst_size(mStream);
	410	+}
	411	+
	412	+void X86Assembler::SHR(int imm, int disp, int dst) {
	413	+ encoder_imm_mem(Mnemonic_SHR, OpndSize_32, imm, disp, dst, 0/isBasePhysical/, mStream);
	414	+ mStream = mStream + encoder_get_inst_size(mStream);
	415	+}
	416	+
	417	+void X86Assembler::SAR(int imm, int dst) {
	418	+ encoder_imm_reg(Mnemonic_SAR, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	419	+ mStream = mStream + encoder_get_inst_size(mStream);
	420	+}
	421	+
	422	+void X86Assembler::ROR(const int imm, int dst) {
	423	+ encoder_imm_reg(Mnemonic_ROR, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	424	+ mStream = mStream + encoder_get_inst_size(mStream);
	425	+}
	426	+
	427	+void X86Assembler::ROR(int imm, int disp, int dst) {
	428	+ encoder_imm_mem(Mnemonic_ROR, OpndSize_32, imm, disp, dst, 0/isBasePhysical/, mStream);
	429	+ mStream = mStream + encoder_get_inst_size(mStream);
	430	+}
	431	+//signed extension
	432	+void X86Assembler::MOVSX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst) {
	433	+ encoder_moves_mem_to_reg(size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, mStream);
	434	+ mStream = mStream + encoder_get_inst_size(mStream);
	435	+}
	436	+
	437	+void X86Assembler::MOVSX_REG_TO_REG(OpndSize size, int src, int dst) {
	438	+ encoder_moves_reg_to_reg(size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	439	+ mStream = mStream + encoder_get_inst_size(mStream);
	440	+}
	441	+//zero entension
	442	+void X86Assembler::MOVZX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst) {
	443	+ encoder_movez_mem_to_reg(size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, mStream);
	444	+ mStream = mStream + encoder_get_inst_size(mStream);
	445	+}
	446	+
	447	+void X86Assembler::MOVZX_REG_TO_REG(OpndSize size, int src, int dst) {
	448	+ encoder_movez_reg_to_reg(size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	449	+ mStream = mStream + encoder_get_inst_size(mStream);
	450	+}
	451	+
	452	+// multiply...
	453	+// the first source operand is placed in EAX
	454	+void X86Assembler::IMUL(int reg) {
	455	+ encoder_reg(Mnemonic_IMUL, OpndSize_32, reg, 0/isPhysical/, LowOpndRegType_gp, mStream);
	456	+ mStream = mStream + encoder_get_inst_size(mStream);
	457	+}
	458	+
	459	+void X86Assembler::IMUL(int src, int dst) {
	460	+ encoder_reg_reg(Mnemonic_IMUL, OpndSize_32, src, 0/isPhysical/, dst/dst is the destination/, 0/isPhysical2/,LowOpndRegType_gp, mStream);
	461	+ mStream = mStream + encoder_get_inst_size(mStream);
	462	+}
	463	+
	464	+void X86Assembler::MUL(int reg) {
	465	+ encoder_reg(Mnemonic_MUL, OpndSize_32, reg, 0/isPhysical/, LowOpndRegType_gp, mStream);
	466	+ mStream = mStream + encoder_get_inst_size(mStream);
	467	+}
	468	+
	469	+
	470	+// data transfer...
	471	+void X86Assembler::MOV_IMM_TO_REG(int32_t imm, int dst) {
	472	+ encoder_imm_reg(Mnemonic_MOV, OpndSize_32, imm, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	473	+ mStream = mStream + encoder_get_inst_size(mStream);
	474	+}
	475	+
	476	+void X86Assembler::MOV_REG_TO_REG(int src, int dst, OpndSize size)
	477	+{
	478	+ if(src == dst) return;
	479	+ encoder_reg_reg(Mnemonic_MOV, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	480	+ mStream = mStream + encoder_get_inst_size(mStream);
	481	+}
	482	+
	483	+void X86Assembler::MOV_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size)
	484	+{
	485	+ encoder_reg_mem(Mnemonic_MOV, size, reg, 0/isPhysical/, disp, base_reg, 0/isBasePhysical/, LowOpndRegType_gp, mStream);
	486	+ mStream = mStream + encoder_get_inst_size(mStream);
	487	+}
	488	+
	489	+void X86Assembler::MOV_MEM_TO_REG(int disp, int base_reg, int reg, OpndSize size)
	490	+{
	491	+ encoder_mem_reg(Mnemonic_MOV, size, disp, base_reg, 0/isBasePhysical/,
	492	+ reg, 0/isPhysical/, LowOpndRegType_gp, mStream);
	493	+ mStream = mStream + encoder_get_inst_size(mStream);
	494	+}
	495	+
	496	+void X86Assembler::MOV_MEM_SCALE_TO_REG(int base_reg, int index_reg, int scale, int reg, OpndSize size)
	497	+{
	498	+ encoder_mem_scale_reg(Mnemonic_MOV, size, base_reg, 0/isBasePhysical/, index_reg, 0/isIndexPhysical/, scale, reg, 0/isPhysical/, LowOpndRegType_gp, mStream);
	499	+ mStream = mStream + encoder_get_inst_size(mStream);
	500	+}
	501	+// the conditional move
	502	+void X86Assembler::CMOV_REG_TO_REG(Mnemonic cc, int src, int dst, OpndSize size)
	503	+{
	504	+ switch (cc) {
	505	+ case Mnemonic_CMOVO:
	506	+ encoder_reg_reg(Mnemonic_CMOVO, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	507	+ break;
	508	+ case Mnemonic_CMOVNO:
	509	+ encoder_reg_reg(Mnemonic_CMOVNO, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	510	+ break;
	511	+ case Mnemonic_CMOVB:
	512	+ encoder_reg_reg(Mnemonic_CMOVB, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	513	+ break;
	514	+ case Mnemonic_CMOVNB:
	515	+ encoder_reg_reg(Mnemonic_CMOVNB, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	516	+ break;
	517	+ case Mnemonic_CMOVZ:
	518	+ encoder_reg_reg(Mnemonic_CMOVZ, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	519	+ break;
	520	+ case Mnemonic_CMOVNZ:
	521	+ encoder_reg_reg(Mnemonic_CMOVNZ, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	522	+ break;
	523	+ case Mnemonic_CMOVBE:
	524	+ encoder_reg_reg(Mnemonic_CMOVBE, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	525	+ break;
	526	+ case Mnemonic_CMOVNBE:
	527	+ encoder_reg_reg(Mnemonic_CMOVNBE, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	528	+ break;
	529	+ case Mnemonic_CMOVS:
	530	+ encoder_reg_reg(Mnemonic_CMOVS, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	531	+ break;
	532	+ case Mnemonic_CMOVNS:
	533	+ encoder_reg_reg(Mnemonic_CMOVNS, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	534	+ break;
	535	+ case Mnemonic_CMOVP:
	536	+ encoder_reg_reg(Mnemonic_CMOVP, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	537	+ break;
	538	+ case Mnemonic_CMOVNP:
	539	+ encoder_reg_reg(Mnemonic_CMOVNP, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	540	+ break;
	541	+ case Mnemonic_CMOVL:
	542	+ encoder_reg_reg(Mnemonic_CMOVL, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	543	+ break;
	544	+ case Mnemonic_CMOVNL:
	545	+ encoder_reg_reg(Mnemonic_CMOVNL, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	546	+ break;
	547	+ case Mnemonic_CMOVLE:
	548	+ encoder_reg_reg(Mnemonic_CMOVLE, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	549	+ break;
	550	+ case Mnemonic_CMOVNLE:
	551	+ encoder_reg_reg(Mnemonic_CMOVNLE, size, src, 0/isPhysical/, dst, 0/isPhysical2/, LowOpndRegType_gp, mStream);
	552	+ break;
	553	+ default :
	554	+ printf("the condition is not supported.\n");
	555	+ return;
	556	+ }
	557	+ mStream = mStream + encoder_get_inst_size(mStream);
	558	+}
	559	+
	560	+void X86Assembler::CMOV_MEM_TO_REG(Mnemonic cc, int disp, int base_reg, int dst, OpndSize size)
	561	+{
	562	+ switch (cc) {
	563	+ case Mnemonic_CMOVO:
	564	+ encoder_mem_reg(Mnemonic_CMOVO, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	565	+ break;
	566	+ case Mnemonic_CMOVNO:
	567	+ encoder_mem_reg(Mnemonic_CMOVNO, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	568	+ break;
	569	+ case Mnemonic_CMOVB:
	570	+ encoder_mem_reg(Mnemonic_CMOVB, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	571	+ break;
	572	+ case Mnemonic_CMOVNB:
	573	+ encoder_mem_reg(Mnemonic_CMOVNB, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	574	+ break;
	575	+ case Mnemonic_CMOVZ:
	576	+ encoder_mem_reg(Mnemonic_CMOVZ, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	577	+ break;
	578	+ case Mnemonic_CMOVNZ:
	579	+ encoder_mem_reg(Mnemonic_CMOVNZ, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	580	+ break;
	581	+ case Mnemonic_CMOVBE:
	582	+ encoder_mem_reg(Mnemonic_CMOVBE, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	583	+ break;
	584	+ case Mnemonic_CMOVNBE:
	585	+ encoder_mem_reg(Mnemonic_CMOVNBE, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	586	+ break;
	587	+ case Mnemonic_CMOVS:
	588	+ encoder_mem_reg(Mnemonic_CMOVS, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	589	+ break;
	590	+ case Mnemonic_CMOVNS:
	591	+ encoder_mem_reg(Mnemonic_CMOVNS, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	592	+ break;
	593	+ case Mnemonic_CMOVP:
	594	+ encoder_mem_reg(Mnemonic_CMOVP, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	595	+ break;
	596	+ case Mnemonic_CMOVNP:
	597	+ encoder_mem_reg(Mnemonic_CMOVNP, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	598	+ break;
	599	+ case Mnemonic_CMOVL:
	600	+ encoder_mem_reg(Mnemonic_CMOVL, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	601	+ break;
	602	+ case Mnemonic_CMOVNL:
	603	+ encoder_mem_reg(Mnemonic_CMOVNL, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	604	+ break;
	605	+ case Mnemonic_CMOVLE:
	606	+ encoder_mem_reg(Mnemonic_CMOVLE, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	607	+ break;
	608	+ case Mnemonic_CMOVNLE:
	609	+ encoder_mem_reg(Mnemonic_CMOVNLE, size, disp, base_reg, 0/isBasePhysical/, dst, 0/isPhysical/, LowOpndRegType_gp, mStream);
	610	+ break;
	611	+ default :
	612	+ printf("the condition is not supported.\n");
	613	+ return;
	614	+ }
	615	+ mStream = mStream + encoder_get_inst_size(mStream);
	616	+}
	617	+
	618	+}; // namespace android

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/X86Assembler.h

		@@ -0,0 +1,163 @@
	1	+/* libs/pixelflinger/codeflinger/x86/X86Assembler.h
	2	+**
	3	+** Copyright 2006, The Android Open Source Project
	4	+**
	5	+** Licensed under the Apache License, Version 2.0 (the "License");
	6	+** you may not use this file except in compliance with the License.
	7	+** You may obtain a copy of the License at
	8	+**
	9	+** http://www.apache.org/licenses/LICENSE-2.0
	10	+**
	11	+** Unless required by applicable law or agreed to in writing, software
	12	+** distributed under the License is distributed on an "AS IS" BASIS,
	13	+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+** See the License for the specific language governing permissions and
	15	+** limitations under the License.
	16	+*/
	17	+
	18	+#ifndef ANDROID_X86ASSEMBLER_H
	19	+#define ANDROID_X86ASSEMBLER_H
	20	+
	21	+#include <stdint.h>
	22	+#include <sys/types.h>
	23	+
	24	+#include <utils/Vector.h>
	25	+#include <utils/KeyedVector.h>
	26	+
	27	+#include "codeflinger/tinyutils/smartpointer.h"
	28	+#include "codeflinger/CodeCache.h"
	29	+#include "enc_wrapper.h"
	30	+
	31	+namespace android {
	32	+
	33	+// ----------------------------------------------------------------------------
	34	+
	35	+class X86Assembler
	36	+{
	37	+public:
	38	+
	39	+ enum {
	40	+ EAX = PhysicalReg_EAX, EBX = PhysicalReg_EBX, ECX = PhysicalReg_ECX,
	41	+ EDX = PhysicalReg_EDX, EDI = PhysicalReg_EDI, ESI = PhysicalReg_ESI,
	42	+ ESP = PhysicalReg_ESP, EBP = PhysicalReg_EBP
	43	+ };
	44	+
	45	+ X86Assembler(const sp<Assembly>& assembly);
	46	+ ~X86Assembler();
	47	+
	48	+ char* base() const;
	49	+ char* pc() const;
	50	+
	51	+
	52	+ void disassemble(const char* name);
	53	+
	54	+ // ------------------------------------------------------------------------
	55	+ // X86AssemblerInterface...
	56	+ // ------------------------------------------------------------------------
	57	+
	58	+ void reset();
	59	+
	60	+ int generate(const char* name);
	61	+
	62	+ void comment(const char* string);
	63	+
	64	+ void label(const char* theLabel);
	65	+
	66	+ void JCC(Mnemonic cc, const char* label);
	67	+
	68	+ void JMP(const char* label);
	69	+
	70	+ void prepare_esp(int old_offset);
	71	+
	72	+ void update_esp(int new_offset);
	73	+
	74	+ void shrink_esp(int shrink_offset);
	75	+
	76	+ void callee_work();
	77	+
	78	+ void return_work();
	79	+
	80	+ char* pcForLabel(const char* label);
	81	+
	82	+ void PUSH(int reg);
	83	+
	84	+ void POP(int reg);
	85	+
	86	+ void ADD_REG_TO_REG(int src, int dst);
	87	+ void ADD_IMM_TO_REG(int imm, int dst);
	88	+ void ADD_IMM_TO_MEM(int imm, int disp, int dst);
	89	+ void ADD_MEM_TO_REG(int base_reg, int disp, int dst);
	90	+ void ADD_REG_TO_MEM(int src, int base_reg, int disp);
	91	+ void SUB_REG_TO_REG(int src, int dst);
	92	+ void SUB_IMM_TO_REG(int imm, int dst);
	93	+ void SUB_IMM_TO_MEM(int imm, int disp, int dst);
	94	+ void SUB_REG_TO_MEM(int src, int base_reg, int disp);
	95	+
	96	+ void TEST_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32);
	97	+ void CMP_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32);
	98	+ void CMP_MEM_TO_REG(int base_reg, int disp, int dst, OpndSize size=OpndSize_32);
	99	+ void CMP_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size=OpndSize_32);
	100	+ void CMP_IMM_TO_REG(int imm, int dst);
	101	+
	102	+ void AND_REG_TO_REG(int src, int dst);
	103	+ void AND_IMM_TO_REG(int imm, int dst);
	104	+ void OR_REG_TO_REG(int src, int dst);
	105	+ void XOR(int src, int dst);
	106	+ void OR_IMM_TO_REG(int imm, int dst);
	107	+ void NOT(int dst);
	108	+ void NEG(int dst);
	109	+ void SHL(int imm, int dst);
	110	+ void SHL(int imm, int disp, int dst);
	111	+ void SHR(int imm, int dst);
	112	+ void SHR(int imm, int disp, int dst);
	113	+ void SAR(int imm, int dst);
	114	+ void ROR(const int imm, int dst);
	115	+ void ROR(int imm, int disp, int dst);
	116	+ void IMUL(int reg);
	117	+ void IMUL(int src, int dst);
	118	+ void MUL(int reg);
	119	+
	120	+ void MOVSX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst);
	121	+ void MOVSX_REG_TO_REG(OpndSize size, int src, int dst);
	122	+ void MOVZX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst);
	123	+ void MOVZX_REG_TO_REG(OpndSize size, int src, int dst);
	124	+ void MOV_IMM_TO_REG(int32_t imm, int dst);
	125	+ void MOV_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32);
	126	+ void MOV_MEM_TO_REG(int disp, int base_reg, int reg, OpndSize size=OpndSize_32);
	127	+ void MOV_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size=OpndSize_32);
	128	+ void MOV_MEM_SCALE_TO_REG(int base_reg, int index_reg, int scale, int reg, OpndSize size=OpndSize_32);
	129	+ void CMOV_REG_TO_REG(Mnemonic cc, int src, int dst, OpndSize size=OpndSize_32);
	130	+ void CMOV_MEM_TO_REG(Mnemonic cc, int disp, int base_reg, int dst, OpndSize size=OpndSize_32);
	131	+
	132	+
	133	+ sp<Assembly> mAssembly;
	134	+ char* mBase;
	135	+ char* mStream;
	136	+ //branch target offset is relative to the next instruction
	137	+ char* mStreamNext;
	138	+ //updating esp after iterating the loop
	139	+ char* mStreamUpdate;
	140	+
	141	+ int64_t mDuration;
	142	+#if defined(WITH_LIB_HARDWARE)
	143	+ bool mQemuTracing;
	144	+#endif
	145	+
	146	+ struct branch_target_t {
	147	+ inline branch_target_t() : label(0), pc(0), next_pc(0) { }
	148	+ inline branch_target_t(const char* l, char* p, char* next_p)
	149	+ : label(l), pc(p), next_pc(next_p) { }
	150	+ const char* label;
	151	+ char* pc;
	152	+ char* next_pc;
	153	+ };
	154	+
	155	+ Vector<branch_target_t> mBranchTargets;
	156	+ KeyedVector< const char, char > mLabels;
	157	+ KeyedVector< char, const char > mLabelsInverseMapping;
	158	+ KeyedVector< char, const char > mComments;
	159	+};
	160	+
	161	+}; // namespace android
	162	+
	163	+#endif //ANDROID_X86ASSEMBLER_H

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/blending.cpp

		@@ -0,0 +1,974 @@
	1	+/* libs/pixelflinger/codeflinger/x86/blending.cpp
	2	+**
	3	+** Copyright 2006, The Android Open Source Project
	4	+**
	5	+** Licensed under the Apache License, Version 2.0 (the "License");
	6	+** you may not use this file except in compliance with the License.
	7	+** You may obtain a copy of the License at
	8	+**
	9	+** http://www.apache.org/licenses/LICENSE-2.0
	10	+**
	11	+** Unless required by applicable law or agreed to in writing, software
	12	+** distributed under the License is distributed on an "AS IS" BASIS,
	13	+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+** See the License for the specific language governing permissions and
	15	+** limitations under the License.
	16	+*/
	17	+
	18	+#include <assert.h>
	19	+#include <stdint.h>
	20	+#include <stdlib.h>
	21	+#include <stdio.h>
	22	+#include <sys/types.h>
	23	+
	24	+#include <cutils/log.h>
	25	+
	26	+#include "codeflinger/x86/GGLX86Assembler.h"
	27	+
	28	+
	29	+namespace android {
	30	+
	31	+void GGLX86Assembler::build_fog(
	32	+ component_t& temp, // incomming fragment / output
	33	+ int component,
	34	+ Scratch& regs)
	35	+{
	36	+ if (mInfo[component].fog) {
	37	+ Scratch scratches(registerFile());
	38	+ comment("fog");
	39	+
	40	+ temp.reg = scratches.obtain();
	41	+ MOV_MEM_TO_REG(temp.offset_ebp, EBP, temp.reg);
	42	+ integer_t fragment(temp.reg, temp.h, temp.flags, temp.offset_ebp);
	43	+ if (!(temp.flags & CORRUPTIBLE)) {
	44	+ temp.reg = regs.obtain();
	45	+ temp.flags \|= CORRUPTIBLE;
	46	+ }
	47	+
	48	+ integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
	49	+ mBuilderContext.Rctx = scratches.obtain();
	50	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	51	+ MOVZX_MEM_TO_REG(OpndSize_8, mBuilderContext.Rctx, GGL_OFFSETOF(state.fog.color[component]), fogColor.reg);
	52	+
	53	+ integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
	54	+ CONTEXT_LOAD(factor.reg, generated_vars.f);
	55	+ scratches.recycle(mBuilderContext.Rctx);
	56	+
	57	+ // clamp fog factor (TODO: see if there is a way to guarantee
	58	+ // we won't overflow, when setting the iterators)
	59	+ int temp_reg = scratches.obtain();
	60	+ MOV_REG_TO_REG(factor.reg, temp_reg);
	61	+ SAR(31, temp_reg);
	62	+ NOT(temp_reg);
	63	+ AND_REG_TO_REG(temp_reg, factor.reg);
	64	+ MOV_IMM_TO_REG(0x10000, temp_reg);
	65	+ CMP_IMM_TO_REG(0x10000, factor.reg);
	66	+ CMOV_REG_TO_REG(Mnemonic_CMOVAE, temp_reg, factor.reg);
	67	+ scratches.recycle(temp_reg);
	68	+
	69	+ //we will resue factor.reg
	70	+ build_blendFOneMinusF(temp, factor, fragment, fogColor);
	71	+ MOV_REG_TO_MEM(temp.reg, temp.offset_ebp, EBP);
	72	+ scratches.recycle(temp.reg);
	73	+ }
	74	+}
	75	+
	76	+void GGLX86Assembler::build_blending(
	77	+ component_t& temp, // incomming fragment / output
	78	+ pixel_t& pixel, // framebuffer
	79	+ int component,
	80	+ Scratch& regs)
	81	+{
	82	+ if (!mInfo[component].blend)
	83	+ return;
	84	+
	85	+ int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
	86	+ int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
	87	+ if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
	88	+ fs = GGL_ONE;
	89	+ const int blending = blending_codes(fs, fd);
	90	+ if (!temp.size()) {
	91	+ // here, blending will produce something which doesn't depend on
	92	+ // that component (eg: GL_ZERO:GL_*), so the register has not been
	93	+ // allocated yet. Will never be used as a source.
	94	+ //temp = component_t(regs.obtain(), CORRUPTIBLE, temp_offset_ebp);
	95	+ temp.reg = regs.obtain();
	96	+ temp.flags = CORRUPTIBLE;
	97	+ temp.h = temp.l = 0;
	98	+ } else {
	99	+ temp.reg = regs.obtain();
	100	+ }
	101	+ MOV_MEM_TO_REG(temp.offset_ebp, EBP, temp.reg);
	102	+ // we are doing real blending...
	103	+ // fb: extracted dst
	104	+ // fragment: extracted src
	105	+ // temp: component_t(fragment) and result
	106	+
	107	+ // scoped register allocator
	108	+ Scratch scratches(registerFile());
	109	+ comment("blending");
	110	+
	111	+ // we can optimize these cases a bit...
	112	+ // (1) saturation is not needed
	113	+ // (2) we can use only one multiply instead of 2
	114	+ // (3) we can reduce the register pressure
	115	+ // R = Sf + D(1-f) = (S-D)*f + D
	116	+ // R = S(1-f) + Df = (D-S)*f + S
	117	+
	118	+ const bool same_factor_opt1 =
	119	+ (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) \|\|
	120	+ (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) \|\|
	121	+ (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) \|\|
	122	+ (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
	123	+
	124	+ const bool same_factor_opt2 =
	125	+ (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) \|\|
	126	+ (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) \|\|
	127	+ (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) \|\|
	128	+ (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
	129	+
	130	+
	131	+ // XXX: we could also optimize these cases:
	132	+ // R = Sf + Df = (S+D)*f
	133	+ // R = S(1-f) + D(1-f) = (S+D)*(1-f)
	134	+ // R = SD + DS = 2SD
	135	+
	136	+
	137	+ pixel.reg = scratches.obtain();
	138	+ MOV_MEM_TO_REG(pixel.offset_ebp, EBP, pixel.reg);
	139	+ // see if we need to extract 'component' from the destination (fb)
	140	+ integer_t fb;
	141	+ if (blending & (BLEND_DST\|FACTOR_DST)) {
	142	+ fb.setTo(scratches.obtain(), 32);
	143	+ extract(fb, pixel, component);
	144	+ if (mDithering) {
	145	+ // XXX: maybe what we should do instead, is simply
	146	+ // expand fb -or- fragment to the larger of the two
	147	+ if (fb.size() < temp.size()) {
	148	+ // for now we expand 'fb' to min(fragment, 8)
	149	+ int new_size = temp.size() < 8 ? temp.size() : 8;
	150	+ expand(fb, fb, new_size);
	151	+ }
	152	+ }
	153	+ }
	154	+
	155	+ // convert input fragment to integer_t
	156	+ if (temp.l && (temp.flags & CORRUPTIBLE)) {
	157	+ SHR(temp.l, temp.reg);
	158	+ temp.h -= temp.l;
	159	+ temp.l = 0;
	160	+ }
	161	+ integer_t fragment(temp.reg, temp.size(), temp.flags, temp.offset_ebp);
	162	+
	163	+ // if not done yet, convert input fragment to integer_t
	164	+ if (temp.l) {
	165	+ // here we know temp is not CORRUPTIBLE
	166	+ fragment.reg = scratches.obtain();
	167	+ MOV_REG_TO_REG(temp.reg, fragment.reg);
	168	+ SHR(temp.l, fragment.reg);
	169	+ fragment.flags \|= CORRUPTIBLE;
	170	+ }
	171	+
	172	+ if (!(temp.flags & CORRUPTIBLE)) {
	173	+ // temp is not corruptible, but since it's the destination it
	174	+ // will be modified, so we need to allocate a new register.
	175	+ temp.reg = regs.obtain();
	176	+ temp.flags &= ~CORRUPTIBLE;
	177	+ fragment.flags &= ~CORRUPTIBLE;
	178	+ }
	179	+
	180	+ if ((blending & BLEND_SRC) && !same_factor_opt1) {
	181	+ // source (fragment) is needed for the blending stage
	182	+ // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
	183	+ fragment.flags &= ~CORRUPTIBLE;
	184	+ }
	185	+
	186	+
	187	+ if (same_factor_opt1) {
	188	+ // R = Sf + D(1-f) = (S-D)*f + D
	189	+ integer_t factor;
	190	+ build_blend_factor(factor, fs,
	191	+ component, pixel, fragment, fb, scratches);
	192	+ // fb is always corruptible from this point
	193	+ fb.flags \|= CORRUPTIBLE;
	194	+ //we will reuse factor in mul_factor_add of build_blendFOneMinusF, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor
	195	+ if(factor.reg == fragment.reg \|\| factor.reg == fb.reg)
	196	+ MOV_REG_TO_REG(factor.reg, pixel.reg);
	197	+ else
	198	+ scratches.recycle(pixel.reg);
	199	+ build_blendFOneMinusF(temp, factor, fragment, fb);
	200	+ if(factor.reg == fragment.reg \|\| factor.reg == fb.reg) {
	201	+ MOV_REG_TO_REG(pixel.reg, factor.reg);
	202	+ scratches.recycle(pixel.reg);
	203	+ }
	204	+ scratches.recycle(fb.reg);
	205	+ //scratches.recycle(factor.reg);
	206	+ } else if (same_factor_opt2) {
	207	+ // R = S(1-f) + Df = (D-S)*f + S
	208	+ integer_t factor;
	209	+ // fb is always corrruptible here
	210	+ fb.flags \|= CORRUPTIBLE;
	211	+ build_blend_factor(factor, fd,
	212	+ component, pixel, fragment, fb, scratches);
	213	+ //we will reuse factor in mul_factor_add of build_blendFOneMinusFF, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor
	214	+ if(factor.reg == fragment.reg \|\| factor.reg == fb.reg)
	215	+ MOV_REG_TO_REG(factor.reg, pixel.reg);
	216	+ else
	217	+ scratches.recycle(pixel.reg);
	218	+ build_blendOneMinusFF(temp, factor, fragment, fb);
	219	+ if(factor.reg == fragment.reg \|\| factor.reg == fb.reg) {
	220	+ MOV_REG_TO_REG(pixel.reg, factor.reg);
	221	+ scratches.recycle(pixel.reg);
	222	+ }
	223	+ scratches.recycle(fb.reg);
	224	+ } else {
	225	+ integer_t src_factor;
	226	+ integer_t dst_factor;
	227	+
	228	+ // if destination (fb) is not needed for the blending stage,
	229	+ // then it can be marked as CORRUPTIBLE
	230	+ if (!(blending & BLEND_DST)) {
	231	+ fb.flags \|= CORRUPTIBLE;
	232	+ }
	233	+
	234	+ // XXX: try to mark some registers as CORRUPTIBLE
	235	+ // in most case we could make those corruptible
	236	+ // when we're processing the last component
	237	+ // but not always, for instance
	238	+ // when fragment is constant and not reloaded
	239	+ // when fb is needed for logic-ops or masking
	240	+ // when a register is aliased (for instance with mAlphaSource)
	241	+
	242	+ // blend away...
	243	+ if (fs==GGL_ZERO) {
	244	+ if (fd==GGL_ZERO) { // R = 0
	245	+ // already taken care of
	246	+ } else if (fd==GGL_ONE) { // R = D
	247	+ // already taken care of
	248	+ } else { // R = D*fd
	249	+ // compute fd
	250	+ build_blend_factor(dst_factor, fd,
	251	+ component, pixel, fragment, fb, scratches);
	252	+ scratches.recycle(pixel.reg);
	253	+ mul_factor(temp, fb, dst_factor, regs);
	254	+ scratches.recycle(fb.reg);
	255	+ }
	256	+ } else if (fs==GGL_ONE) {
	257	+ int temp_reg;
	258	+ if (fd==GGL_ZERO) { // R = S
	259	+ // NOP, taken care of
	260	+ } else if (fd==GGL_ONE) { // R = S + D
	261	+ component_add(temp, fb, fragment); // args order matters
	262	+ temp_reg = scratches.obtain();
	263	+ component_sat(temp, temp_reg);
	264	+ scratches.recycle(temp_reg);
	265	+ } else { // R = S + D*fd
	266	+ // compute fd
	267	+ build_blend_factor(dst_factor, fd,
	268	+ component, pixel, fragment, fb, scratches);
	269	+ //we will probably change src_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor
	270	+ if(dst_factor.reg == fragment.reg \|\| dst_factor.reg == fb.reg)
	271	+ MOV_REG_TO_REG(dst_factor.reg, pixel.reg);
	272	+ else
	273	+ scratches.recycle(pixel.reg);
	274	+ mul_factor_add(temp, fb, dst_factor, component_t(fragment));
	275	+ if(dst_factor.reg == fragment.reg \|\| dst_factor.reg == fb.reg) {
	276	+ MOV_REG_TO_REG(pixel.reg, dst_factor.reg);
	277	+ scratches.recycle(pixel.reg);
	278	+ }
	279	+ temp_reg = fb.reg;
	280	+ component_sat(temp, temp_reg);
	281	+ scratches.recycle(fb.reg);
	282	+ }
	283	+ } else {
	284	+ // compute fs
	285	+ int temp_reg;
	286	+ build_blend_factor(src_factor, fs,
	287	+ component, pixel, fragment, fb, scratches);
	288	+ if (fd==GGL_ZERO) { // R = S*fs
	289	+ mul_factor(temp, fragment, src_factor, regs);
	290	+ if (scratches.isUsed(src_factor.reg))
	291	+ scratches.recycle(src_factor.reg);
	292	+ } else if (fd==GGL_ONE) { // R = S*fs + D
	293	+ //we will probably change src_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor
	294	+ if(src_factor.reg == fragment.reg \|\| src_factor.reg == fb.reg)
	295	+ MOV_REG_TO_REG(src_factor.reg, pixel.reg);
	296	+ else
	297	+ scratches.recycle(pixel.reg);
	298	+ mul_factor_add(temp, fragment, src_factor, component_t(fb));
	299	+ if(src_factor.reg == fragment.reg \|\| src_factor.reg == fb.reg) {
	300	+ MOV_REG_TO_REG(pixel.reg, src_factor.reg);
	301	+ scratches.recycle(pixel.reg);
	302	+ }
	303	+ temp_reg = fb.reg;
	304	+ component_sat(temp, temp_reg);
	305	+ scratches.recycle(fb.reg);
	306	+ } else { // R = Sfs + Dfd
	307	+ mul_factor(temp, fragment, src_factor, regs);
	308	+ if (scratches.isUsed(src_factor.reg))
	309	+ scratches.recycle(src_factor.reg);
	310	+ // compute fd
	311	+ build_blend_factor(dst_factor, fd,
	312	+ component, pixel, fragment, fb, scratches);
	313	+ //we will probably change dst_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg
	314	+ if(dst_factor.reg == fragment.reg \|\| dst_factor.reg == fb.reg)
	315	+ MOV_REG_TO_REG(dst_factor.reg, pixel.reg);
	316	+ else
	317	+ scratches.recycle(pixel.reg);
	318	+ mul_factor_add(temp, fb, dst_factor, temp);
	319	+ if(dst_factor.reg == fragment.reg \|\| dst_factor.reg == fb.reg) {
	320	+ MOV_REG_TO_REG(pixel.reg, dst_factor.reg);
	321	+ scratches.recycle(pixel.reg);
	322	+ }
	323	+ if (!same_factor_opt1 && !same_factor_opt2) {
	324	+ temp_reg = fb.reg;
	325	+ component_sat(temp, temp_reg);
	326	+ }
	327	+ scratches.recycle(fb.reg);
	328	+ }
	329	+ if(scratches.isUsed(pixel.reg))
	330	+ scratches.recycle(pixel.reg);
	331	+ }
	332	+ }
	333	+ // temp is modified, but it will be used immediately in downshift
	334	+ //printf("temp.offset_ebp: %d \n", temp.offset_ebp);
	335	+ //below will be triggered on CDK for surfaceflinger
	336	+ if(temp.offset_ebp == mAlphaSource.offset_ebp) {
	337	+ mCurSp = mCurSp - 4;
	338	+ temp.offset_ebp = mCurSp;
	339	+ }
	340	+ // the r, g, b value must be stored, otherwise the color of globaltime is incorrect.
	341	+ MOV_REG_TO_MEM(temp.reg, temp.offset_ebp, EBP);
	342	+ regs.recycle(temp.reg);
	343	+
	344	+ // now we can be corrupted (it's the dest)
	345	+ temp.flags \|= CORRUPTIBLE;
	346	+}
	347	+
	348	+void GGLX86Assembler::build_blend_factor(
	349	+ integer_t& factor, int f, int component,
	350	+ const pixel_t& dst_pixel,
	351	+ integer_t& fragment,
	352	+ integer_t& fb,
	353	+ Scratch& scratches)
	354	+{
	355	+ integer_t src_alpha(fragment);
	356	+
	357	+ // src_factor/dst_factor won't be used after blending,
	358	+ // so it's fine to mark them as CORRUPTIBLE (if not aliased)
	359	+ factor.flags \|= CORRUPTIBLE;
	360	+ int temp_reg;
	361	+ switch(f) {
	362	+ case GGL_ONE_MINUS_SRC_ALPHA:
	363	+ case GGL_SRC_ALPHA:
	364	+ if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
	365	+ // we're processing alpha, so we already have
	366	+ // src-alpha in fragment, and we need src-alpha just this time.
	367	+ } else {
	368	+ // alpha-src will be needed for other components
	369	+ factor = mAlphaSource;
	370	+ factor.flags &= ~CORRUPTIBLE;
	371	+ factor.reg = scratches.obtain();
	372	+ //printf("mAlphaSource.offset_ebp: %d \n", mAlphaSource.offset_ebp);
	373	+ //printf("fragment.offset_ebp: %d \n", fragment.offset_ebp);
	374	+ //printf("factor.offset_ebp: %d \n", factor.offset_ebp);
	375	+ MOV_MEM_TO_REG(mAlphaSource.offset_ebp, EBP, factor.reg);
	376	+ if (!mBlendFactorCached \|\| mBlendFactorCached==f) {
	377	+ src_alpha = mAlphaSource;
	378	+ // we already computed the blend factor before, nothing to do.
	379	+ if (mBlendFactorCached)
	380	+ return;
	381	+ // this is the first time, make sure to compute the blend
	382	+ // factor properly.
	383	+ mBlendFactorCached = f;
	384	+ break;
	385	+ } else {
	386	+ // we have a cached alpha blend factor, but we want another one,
	387	+ // this should really not happen because by construction,
	388	+ // we cannot have BOTH source and destination
	389	+ // blend factors use ALPHA and ONE_MINUS_ALPHA (because
	390	+ // the blending stage uses the f/(1-f) optimization
	391	+
	392	+ // for completeness, we handle this case though. Since there
	393	+ // are only 2 choices, this meens we want "the other one"
	394	+ // (1-factor)
	395	+ //factor = mAlphaSource;
	396	+ //factor.flags &= ~CORRUPTIBLE;
	397	+ NEG(factor.reg);
	398	+ ADD_IMM_TO_REG((1<<factor.s), factor.reg);
	399	+ MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP);
	400	+ mBlendFactorCached = f;
	401	+ return;
	402	+ }
	403	+ }
	404	+ // fall-through...
	405	+ case GGL_ONE_MINUS_DST_COLOR:
	406	+ case GGL_DST_COLOR:
	407	+ case GGL_ONE_MINUS_SRC_COLOR:
	408	+ case GGL_SRC_COLOR:
	409	+ case GGL_ONE_MINUS_DST_ALPHA:
	410	+ case GGL_DST_ALPHA:
	411	+ case GGL_SRC_ALPHA_SATURATE:
	412	+ // help us find out what register we can use for the blend-factor
	413	+ // CORRUPTIBLE registers are chosen first, or a new one is allocated.
	414	+ if (fragment.flags & CORRUPTIBLE) {
	415	+ factor.setTo(fragment.reg, 32, CORRUPTIBLE, fragment.offset_ebp);
	416	+ fragment.flags &= ~CORRUPTIBLE;
	417	+ } else if (fb.flags & CORRUPTIBLE) {
	418	+ factor.setTo(fb.reg, 32, CORRUPTIBLE, fb.offset_ebp);
	419	+ fb.flags &= ~CORRUPTIBLE;
	420	+ } else {
	421	+ factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
	422	+ mCurSp = mCurSp - 4;
	423	+ factor.offset_ebp = mCurSp;
	424	+ }
	425	+ break;
	426	+ }
	427	+
	428	+ // XXX: doesn't work if size==1
	429	+
	430	+ switch(f) {
	431	+ case GGL_ONE_MINUS_DST_COLOR:
	432	+ case GGL_DST_COLOR:
	433	+ factor.s = fb.s;
	434	+ MOV_REG_TO_REG(fb.reg, factor.reg);
	435	+ SHR(fb.s-1, factor.reg);
	436	+ ADD_REG_TO_REG(fb.reg, factor.reg);
	437	+ break;
	438	+ case GGL_ONE_MINUS_SRC_COLOR:
	439	+ case GGL_SRC_COLOR:
	440	+ factor.s = fragment.s;
	441	+ temp_reg = scratches.obtain();
	442	+ MOV_REG_TO_REG(fragment.reg, temp_reg);
	443	+ SHR(fragment.s-1, fragment.reg);
	444	+ ADD_REG_TO_REG(temp_reg, fragment.reg);
	445	+ scratches.recycle(temp_reg);
	446	+ break;
	447	+ case GGL_ONE_MINUS_SRC_ALPHA:
	448	+ case GGL_SRC_ALPHA:
	449	+ factor.s = src_alpha.s;
	450	+ if (mBlendFactorCached == f) {
	451	+ //src_alpha == factor == mAlphaSource, we need a temp reg
	452	+ if(scratches.countFreeRegs()) {
	453	+ temp_reg = scratches.obtain();
	454	+ MOV_REG_TO_REG(factor.reg, temp_reg);
	455	+ SHR(src_alpha.s-1, factor.reg);
	456	+ ADD_REG_TO_REG(temp_reg, factor.reg);
	457	+ scratches.recycle(temp_reg);
	458	+ }
	459	+ else {
	460	+ SHR(src_alpha.s-1, factor.offset_ebp, EBP);
	461	+ ADD_MEM_TO_REG(EBP, factor.offset_ebp, factor.reg);
	462	+ }
	463	+ }
	464	+ else
	465	+ {
	466	+ MOV_REG_TO_REG(src_alpha.reg, factor.reg);
	467	+ SHR(src_alpha.s-1, factor.reg);
	468	+ ADD_REG_TO_REG(src_alpha.reg, factor.reg);
	469	+ }
	470	+ // we will store factor in the next switch for GGL_ONE_MINUS_SRC_ALPHA
	471	+ if(f == GGL_SRC_ALPHA)
	472	+ MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP);
	473	+ break;
	474	+ case GGL_ONE_MINUS_DST_ALPHA:
	475	+ case GGL_DST_ALPHA:
	476	+ // XXX: should be precomputed
	477	+ extract(factor, dst_pixel, GGLFormat::ALPHA);
	478	+ temp_reg = scratches.obtain();
	479	+ MOV_REG_TO_REG(factor.reg, temp_reg);
	480	+ SHR(factor.s-1, factor.reg);
	481	+ ADD_REG_TO_REG(temp_reg, factor.reg);
	482	+ scratches.recycle(temp_reg);
	483	+ break;
	484	+ case GGL_SRC_ALPHA_SATURATE:
	485	+ // XXX: should be precomputed
	486	+ // XXX: f = min(As, 1-Ad)
	487	+ // btw, we're guaranteed that Ad's size is <= 8, because
	488	+ // it's extracted from the framebuffer
	489	+ break;
	490	+ }
	491	+
	492	+ switch(f) {
	493	+ case GGL_ONE_MINUS_DST_COLOR:
	494	+ case GGL_ONE_MINUS_SRC_COLOR:
	495	+ case GGL_ONE_MINUS_DST_ALPHA:
	496	+ case GGL_ONE_MINUS_SRC_ALPHA:
	497	+ NEG(factor.reg);
	498	+ ADD_IMM_TO_REG(1<<factor.s, factor.reg);
	499	+ MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP);
	500	+ }
	501	+
	502	+ // don't need more than 8-bits for the blend factor
	503	+ // and this will prevent overflows in the multiplies later
	504	+ if (factor.s > 8) {
	505	+ SHR(factor.s-8, factor.reg);
	506	+ factor.s = 8;
	507	+ if(f == GGL_ONE_MINUS_SRC_ALPHA \|\| f == GGL_SRC_ALPHA)
	508	+ MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP);
	509	+ }
	510	+ //below will be triggered on CDK for surfaceflinger
	511	+ if(fragment.offset_ebp == mAlphaSource.offset_ebp)
	512	+ MOV_REG_TO_REG(factor.reg, fragment.reg);
	513	+}
	514	+
	515	+int GGLX86Assembler::blending_codes(int fs, int fd)
	516	+{
	517	+ int blending = 0;
	518	+ switch(fs) {
	519	+ case GGL_ONE:
	520	+ blending \|= BLEND_SRC;
	521	+ break;
	522	+
	523	+ case GGL_ONE_MINUS_DST_COLOR:
	524	+ case GGL_DST_COLOR:
	525	+ blending \|= FACTOR_DST\|BLEND_SRC;
	526	+ break;
	527	+ case GGL_ONE_MINUS_DST_ALPHA:
	528	+ case GGL_DST_ALPHA:
	529	+ // no need to extract 'component' from the destination
	530	+ // for the blend factor, because we need ALPHA only.
	531	+ blending \|= BLEND_SRC;
	532	+ break;
	533	+
	534	+ case GGL_ONE_MINUS_SRC_COLOR:
	535	+ case GGL_SRC_COLOR:
	536	+ blending \|= FACTOR_SRC\|BLEND_SRC;
	537	+ break;
	538	+ case GGL_ONE_MINUS_SRC_ALPHA:
	539	+ case GGL_SRC_ALPHA:
	540	+ case GGL_SRC_ALPHA_SATURATE:
	541	+ blending \|= FACTOR_SRC\|BLEND_SRC;
	542	+ break;
	543	+ }
	544	+ switch(fd) {
	545	+ case GGL_ONE:
	546	+ blending \|= BLEND_DST;
	547	+ break;
	548	+
	549	+ case GGL_ONE_MINUS_DST_COLOR:
	550	+ case GGL_DST_COLOR:
	551	+ blending \|= FACTOR_DST\|BLEND_DST;
	552	+ break;
	553	+ case GGL_ONE_MINUS_DST_ALPHA:
	554	+ case GGL_DST_ALPHA:
	555	+ blending \|= FACTOR_DST\|BLEND_DST;
	556	+ break;
	557	+
	558	+ case GGL_ONE_MINUS_SRC_COLOR:
	559	+ case GGL_SRC_COLOR:
	560	+ blending \|= FACTOR_SRC\|BLEND_DST;
	561	+ break;
	562	+ case GGL_ONE_MINUS_SRC_ALPHA:
	563	+ case GGL_SRC_ALPHA:
	564	+ // no need to extract 'component' from the source
	565	+ // for the blend factor, because we need ALPHA only.
	566	+ blending \|= BLEND_DST;
	567	+ break;
	568	+ }
	569	+ return blending;
	570	+}
	571	+
	572	+// ---------------------------------------------------------------------------
	573	+
	574	+void GGLX86Assembler::build_blendFOneMinusF(
	575	+ component_t& temp,
	576	+ const integer_t& factor,
	577	+ const integer_t& fragment,
	578	+ const integer_t& fb)
	579	+{
	580	+ // R = Sf + D(1-f) = (S-D)*f + D
	581	+ // compute S-D
	582	+ Scratch scratches(registerFile());
	583	+ integer_t diff(fragment.flags & CORRUPTIBLE ?
	584	+ fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
	585	+ const int shift = fragment.size() - fb.size();
	586	+ if (shift>0) {
	587	+ MOV_REG_TO_REG(fragment.reg, diff.reg);
	588	+ SHR(shift, diff.reg);
	589	+ SUB_REG_TO_REG(fb.reg, diff.reg);
	590	+ } else if (shift<0) {
	591	+ MOV_REG_TO_REG(fragment.reg, diff.reg);
	592	+ SHL(-shift, diff.reg);
	593	+ SUB_REG_TO_REG(fb.reg, diff.reg);
	594	+ } else {
	595	+ MOV_REG_TO_REG(fragment.reg, diff.reg);
	596	+ SUB_REG_TO_REG(fb.reg, diff.reg);
	597	+ }
	598	+ mul_factor_add(temp, diff, factor, component_t(fb));
	599	+ if(!(fragment.flags & CORRUPTIBLE))
	600	+ scratches.recycle(diff.reg);
	601	+}
	602	+
	603	+void GGLX86Assembler::build_blendOneMinusFF(
	604	+ component_t& temp,
	605	+ const integer_t& factor,
	606	+ const integer_t& fragment,
	607	+ const integer_t& fb)
	608	+{
	609	+ // R = Sf + D(1-f) = (S-D)*f + D
	610	+ Scratch scratches(registerFile());
	611	+ // compute D-S
	612	+ integer_t diff(fb.flags & CORRUPTIBLE ?
	613	+ fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
	614	+ const int shift = fragment.size() - fb.size();
	615	+ if (shift>0) {
	616	+ SHR(shift, fragment.reg);
	617	+ MOV_REG_TO_REG(fb.reg, diff.reg);
	618	+ SUB_REG_TO_REG(fragment.reg, diff.reg);
	619	+ }
	620	+ else if (shift<0) {
	621	+ SHR(-shift, fragment.reg);
	622	+ MOV_REG_TO_REG(fb.reg, diff.reg);
	623	+ SUB_REG_TO_REG(fragment.reg, diff.reg);
	624	+ }
	625	+ else {
	626	+ MOV_REG_TO_REG(fb.reg, diff.reg);
	627	+ SUB_REG_TO_REG(fragment.reg, diff.reg);
	628	+ }
	629	+
	630	+ mul_factor_add(temp, diff, factor, component_t(fragment));
	631	+ if(!(fragment.flags & CORRUPTIBLE))
	632	+ scratches.recycle(diff.reg);
	633	+}
	634	+
	635	+// ---------------------------------------------------------------------------
	636	+
	637	+void GGLX86Assembler::mul_factor( component_t& d,
	638	+ const integer_t& v,
	639	+ const integer_t& f, Scratch& scratches)
	640	+{
	641	+ // f can be changed
	642	+ //
	643	+ int vs = v.size();
	644	+ int fs = f.size();
	645	+ int ms = vs+fs;
	646	+
	647	+ // XXX: we could have special cases for 1 bit mul
	648	+
	649	+ // all this code below to use the best multiply instruction
	650	+ // wrt the parameters size. We take advantage of the fact
	651	+ // that the 16-bits multiplies allow a 16-bit shift
	652	+ // The trick is that we just make sure that we have at least 8-bits
	653	+ // per component (which is enough for a 8 bits display).
	654	+
	655	+ int xy = -1;
	656	+ int vshift = 0;
	657	+ int fshift = 0;
	658	+ int smulw = 0;
	659	+
	660	+ int xyBB = 0;
	661	+ int xyTB = 1;
	662	+ int xyTT = 2;
	663	+ int xyBT = 3;
	664	+ if (vs<16) {
	665	+ if (fs<16) {
	666	+ xy = xyBB;
	667	+ } else if (GGL_BETWEEN(fs, 24, 31)) {
	668	+ ms -= 16;
	669	+ xy = xyTB;
	670	+ } else {
	671	+ // eg: 15 * 18 -> 15 * 15
	672	+ fshift = fs - 15;
	673	+ ms -= fshift;
	674	+ xy = xyBB;
	675	+ }
	676	+ } else if (GGL_BETWEEN(vs, 24, 31)) {
	677	+ if (fs<16) {
	678	+ ms -= 16;
	679	+ xy = xyTB;
	680	+ } else if (GGL_BETWEEN(fs, 24, 31)) {
	681	+ ms -= 32;
	682	+ xy = xyTT;
	683	+ } else {
	684	+ // eg: 24 * 18 -> 8 * 18
	685	+ fshift = fs - 15;
	686	+ ms -= 16 + fshift;
	687	+ xy = xyTB;
	688	+ }
	689	+ } else {
	690	+ if (fs<16) {
	691	+ // eg: 18 * 15 -> 15 * 15
	692	+ vshift = vs - 15;
	693	+ ms -= vshift;
	694	+ xy = xyBB;
	695	+ } else if (GGL_BETWEEN(fs, 24, 31)) {
	696	+ // eg: 18 * 24 -> 15 * 8
	697	+ vshift = vs - 15;
	698	+ ms -= 16 + vshift;
	699	+ xy = xyBT;
	700	+ } else {
	701	+ // eg: 18 * 18 -> (15 * 18)>>16
	702	+ fshift = fs - 15;
	703	+ ms -= 16 + fshift;
	704	+ //xy = yB; //XXX SMULWB
	705	+ smulw = 1;
	706	+ }
	707	+ }
	708	+
	709	+ ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
	710	+
	711	+ int vreg = v.reg;
	712	+ int freg = f.reg;
	713	+ if (vshift) {
	714	+ MOV_REG_TO_REG(vreg, d.reg);
	715	+ SHR(vshift, d.reg);
	716	+ vreg = d.reg;
	717	+ }
	718	+ if (fshift) {
	719	+ MOV_REG_TO_REG(vreg, d.reg);
	720	+ SHR(fshift, d.reg);
	721	+ freg = d.reg;
	722	+ }
	723	+ MOV_REG_TO_REG(vreg, d.reg);
	724	+ if (smulw) {
	725	+ int flag_push_edx = 0;
	726	+ int flag_reserve_edx = 0;
	727	+ int temp_reg2 = -1;
	728	+ int edx_offset_ebp = 0;
	729	+ if(scratches.isUsed(EDX) == 1) {
	730	+ if(d.reg != EDX) {
	731	+ flag_push_edx = 1;
	732	+ mCurSp = mCurSp - 4;
	733	+ edx_offset_ebp = mCurSp;
	734	+ MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP);
	735	+ //PUSH(EDX);
	736	+ }
	737	+ }
	738	+ else {
	739	+ flag_reserve_edx = 1;
	740	+ scratches.reserve(EDX);
	741	+ }
	742	+ if(scratches.isUsed(EAX)) {
	743	+ if( freg == EAX \|\| d.reg == EAX) {
	744	+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
	745	+ if(freg == EAX)
	746	+ IMUL(d.reg);
	747	+ else
	748	+ IMUL(freg);
	749	+ SHL(16, EDX);
	750	+ SHR(16, EAX);
	751	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	752	+ MOV_REG_TO_REG(EDX, d.reg);
	753	+ }
	754	+ else {
	755	+ int eax_offset_ebp = 0;
	756	+ if(scratches.countFreeRegs() > 0) {
	757	+ temp_reg2 = scratches.obtain();
	758	+ MOV_REG_TO_REG(EAX, temp_reg2);
	759	+ }
	760	+ else {
	761	+ mCurSp = mCurSp - 4;
	762	+ eax_offset_ebp = mCurSp;
	763	+ MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP);
	764	+ //PUSH(EAX);
	765	+ }
	766	+ MOV_REG_TO_REG(freg, EAX);
	767	+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
	768	+ IMUL(d.reg);
	769	+ SHL(16, EDX);
	770	+ SHR(16, EAX);
	771	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	772	+ MOV_REG_TO_REG(EDX, d.reg);
	773	+ if(temp_reg2 > -1) {
	774	+ MOV_REG_TO_REG(temp_reg2, EAX);
	775	+ scratches.recycle(temp_reg2);
	776	+ }
	777	+ else {
	778	+ MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX);
	779	+ //POP(EAX);
	780	+ }
	781	+ }
	782	+ }
	783	+ else {
	784	+ MOV_REG_TO_REG(freg, EAX);
	785	+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
	786	+ IMUL(d.reg);
	787	+ SHL(16, EDX);
	788	+ SHR(16, EAX);
	789	+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
	790	+ MOV_REG_TO_REG(EDX, d.reg);
	791	+ }
	792	+ if(flag_push_edx == 1) {
	793	+ MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX);
	794	+ //POP(EDX);
	795	+ }
	796	+ if(flag_reserve_edx ==1)
	797	+ scratches.recycle(EDX);
	798	+ }
	799	+ else {
	800	+ if(xy == xyBB) {
	801	+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
	802	+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
	803	+ IMUL(freg, d.reg);
	804	+ }
	805	+ else if(xy == xyTB) {
	806	+ SHR(16, d.reg);
	807	+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
	808	+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
	809	+ IMUL(freg, d.reg);
	810	+ }
	811	+ else if(xy == xyBT) {
	812	+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
	813	+ SHR(16, freg);
	814	+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
	815	+ IMUL(freg, d.reg);
	816	+ }
	817	+ else if(xy == xyTT) {
	818	+ SHR(16, d.reg);
	819	+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
	820	+ SHR(16, freg);
	821	+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
	822	+ IMUL(freg, d.reg);
	823	+ }
	824	+ }
	825	+
	826	+
	827	+ d.h = ms;
	828	+ if (mDithering) {
	829	+ d.l = 0;
	830	+ } else {
	831	+ d.l = fs;
	832	+ d.flags \|= CLEAR_LO;
	833	+ }
	834	+}
	835	+
	836	+void GGLX86Assembler::mul_factor_add( component_t& d,
	837	+ const integer_t& v,
	838	+ const integer_t& f,
	839	+ const component_t& a)
	840	+{
	841	+ // XXX: we could have special cases for 1 bit mul
	842	+ Scratch scratches(registerFile());
	843	+
	844	+ int vs = v.size();
	845	+ int fs = f.size();
	846	+ int as = a.h;
	847	+ int ms = vs+fs;
	848	+
	849	+ ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
	850	+
	851	+ integer_t add(a.reg, a.h, a.flags, a.offset_ebp);
	852	+
	853	+
	854	+ // 'a' is a component_t but it is guaranteed to have
	855	+ // its high bits set to 0. However in the dithering case,
	856	+ // we can't get away with truncating the potentially bad bits
	857	+ // so extraction is needed.
	858	+
	859	+ if ((mDithering) && (a.size() < ms)) {
	860	+ // we need to expand a
	861	+ if (!(a.flags & CORRUPTIBLE)) {
	862	+ // ... but it's not corruptible, so we need to pick a
	863	+ // temporary register.
	864	+ // Try to uses the destination register first (it's likely
	865	+ // to be usable, unless it aliases an input).
	866	+ if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
	867	+ add.reg = d.reg;
	868	+ } else {
	869	+ add.reg = scratches.obtain();
	870	+ }
	871	+ }
	872	+ expand(add, a, ms); // extracts and expands
	873	+ as = ms;
	874	+ }
	875	+
	876	+ if (ms == as) {
	877	+ MOV_REG_TO_REG(v.reg, d.reg);
	878	+ if (vs<16 && fs<16) {
	879	+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
	880	+ MOVSX_REG_TO_REG(OpndSize_16, f.reg, f.reg);
	881	+ IMUL(f.reg, d.reg);
	882	+ }
	883	+ else
	884	+ IMUL(f.reg, d.reg);
	885	+ ADD_REG_TO_REG(add.reg, d.reg);
	886	+ } else {
	887	+ //int temp = d.reg;
	888	+ //if (temp == add.reg) {
	889	+ // // the mul will modify add.reg, we need an intermediary reg
	890	+ // if (v.flags & CORRUPTIBLE) temp = v.reg;
	891	+ // else if (f.flags & CORRUPTIBLE) temp = f.reg;
	892	+ // else temp = scratches.obtain();
	893	+ //}
	894	+
	895	+ // below d.reg may override "temp" result, so we use a new register
	896	+ int temp_reg;
	897	+ int v_offset_ebp = 0;
	898	+ if(scratches.countFreeRegs() == 0) {
	899	+ temp_reg = v.reg;
	900	+ mCurSp = mCurSp - 4;
	901	+ v_offset_ebp = mCurSp;
	902	+ MOV_REG_TO_MEM(v.reg, v_offset_ebp, EBP);
	903	+ }
	904	+ else {
	905	+ temp_reg = scratches.obtain();
	906	+ MOV_REG_TO_REG(v.reg, temp_reg);
	907	+ }
	908	+ if (vs<16 && fs<16) {
	909	+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg, temp_reg);
	910	+ MOVSX_REG_TO_REG(OpndSize_16, f.reg, f.reg);
	911	+ IMUL(f.reg, temp_reg);
	912	+ }
	913	+ else
	914	+ IMUL(f.reg, temp_reg);
	915	+
	916	+ if (ms>as) {
	917	+ MOV_REG_TO_REG(add.reg, d.reg);
	918	+ SHL(ms-as, d.reg);
	919	+ ADD_REG_TO_REG(temp_reg, d.reg);
	920	+ } else if (ms<as) {
	921	+ // not sure if we should expand the mul instead?
	922	+ MOV_REG_TO_REG(add.reg, d.reg);
	923	+ SHL(as-ms, d.reg);
	924	+ ADD_REG_TO_REG(temp_reg, d.reg);
	925	+ }
	926	+ if(temp_reg == v.reg)
	927	+ MOV_MEM_TO_REG(v_offset_ebp, EBP, v.reg);
	928	+ else
	929	+ scratches.recycle(temp_reg);
	930	+ }
	931	+
	932	+ d.h = ms;
	933	+ if (mDithering) {
	934	+ d.l = a.l;
	935	+ } else {
	936	+ d.l = fs>a.l ? fs : a.l;
	937	+ d.flags \|= CLEAR_LO;
	938	+ }
	939	+}
	940	+
	941	+void GGLX86Assembler::component_add(component_t& d,
	942	+ const integer_t& dst, const integer_t& src)
	943	+{
	944	+ // here we're guaranteed that fragment.size() >= fb.size()
	945	+ const int shift = src.size() - dst.size();
	946	+ if (!shift) {
	947	+ MOV_REG_TO_REG(src.reg, d.reg);
	948	+ ADD_REG_TO_REG(dst.reg, d.reg);
	949	+ } else {
	950	+ MOV_REG_TO_REG(dst.reg, d.reg);
	951	+ SHL(shift, d.reg);
	952	+ ADD_REG_TO_REG(src.reg, d.reg);
	953	+ }
	954	+
	955	+ d.h = src.size();
	956	+ if (mDithering) {
	957	+ d.l = 0;
	958	+ } else {
	959	+ d.l = shift;
	960	+ d.flags \|= CLEAR_LO;
	961	+ }
	962	+}
	963	+
	964	+void GGLX86Assembler::component_sat(const component_t& v, const int temp_reg)
	965	+{
	966	+ const int32_t one = ((1<<v.size())-1)<<v.l;
	967	+ MOV_IMM_TO_REG(one, temp_reg);
	968	+ CMP_IMM_TO_REG(1<<v.h, v.reg);
	969	+ CMOV_REG_TO_REG(Mnemonic_CMOVAE, temp_reg, v.reg);
	970	+}
	971	+
	972	+// ----------------------------------------------------------------------------
	973	+
	974	+}; // namespace android

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/Android.mk

		@@ -0,0 +1,30 @@
	1	+#
	2	+# Copyright (C) 2015 The Android-x86 Open Source Project
	3	+#
	4	+# Licensed under the Apache License, Version 2.0 (the "License");
	5	+# you may not use this file except in compliance with the License.
	6	+# You may obtain a copy of the License at
	7	+#
	8	+# http://www.apache.org/licenses/LICENSE-2.0
	9	+#
	10	+# Unless required by applicable law or agreed to in writing, software
	11	+# distributed under the License is distributed on an "AS IS" BASIS,
	12	+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	+# See the License for the specific language governing permissions and
	14	+# limitations under the License.
	15	+#
	16	+
	17	+LOCAL_PATH := $(call my-dir)
	18	+
	19	+enc_src_files := \
	20	+ dec_base.cpp \
	21	+ enc_base.cpp \
	22	+ enc_tabl.cpp \
	23	+ enc_wrapper.cpp
	24	+
	25	+include $(CLEAR_VARS)
	26	+LOCAL_SRC_FILES := $(enc_src_files)
	27	+LOCAL_MODULE := libenc
	28	+LOCAL_MODULE_TAGS := optional
	29	+LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)
	30	+include $(BUILD_STATIC_LIBRARY)

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/README.txt

		@@ -0,0 +1,21 @@
	1	+Original source from Apache Harmony 5.0M15 (r991518 from 2010-09-01) at
	2	+http://harmony.apache.org/.
	3	+
	4	+The following files are from drlvm/vm/port/src/encoder/ia32_em64t.
	5	+
	6	+ dec_base.cpp
	7	+ dec_base.h
	8	+ enc_base.cpp
	9	+ enc_base.h
	10	+ enc_defs.h
	11	+ enc_prvt.h
	12	+ enc_tabl.cpp
	13	+ encoder.cpp
	14	+ encoder.h
	15	+ encoder.inl
	16	+
	17	+The following files are derived partially from the original Apache
	18	+Harmony files.
	19	+
	20	+ enc_defs_ext.h -- derived from enc_defs.h
	21	+ enc_wrapper.h -- derived from encoder.h

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/dec_base.cpp

		@@ -0,0 +1,541 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+
	21	+/**
	22	+ * @file
	23	+ * @brief Main decoding (disassembling) routines implementation.
	24	+ */
	25	+
	26	+#include "dec_base.h"
	27	+#include "enc_prvt.h"
	28	+#include <stdio.h>
	29	+//#include "open/common.h"
	30	+
	31	+bool DecoderBase::is_prefix(const unsigned char * bytes)
	32	+{
	33	+ unsigned char b0 = *bytes;
	34	+ unsigned char b1 = *(bytes+1);
	35	+ if (b0 == 0xF0) { // LOCK
	36	+ return true;
	37	+ }
	38	+ if (b0==0xF2 \|\| b0==0xF3) { // REPNZ/REPZ prefixes
	39	+ if (b1 == 0x0F) { // .... but may be a part of SIMD opcode
	40	+ return false;
	41	+ }
	42	+ return true;
	43	+ }
	44	+ if (b0 == 0x2E \|\| b0 == 0x36 \|\| b0==0x3E \|\| b0==0x26 \|\| b0==0x64 \|\| b0==0x3E) {
	45	+ // branch hints, segment prefixes
	46	+ return true;
	47	+ }
	48	+ if (b0==0x66) { // operand-size prefix
	49	+ if (b1 == 0x0F) { // .... but may be a part of SIMD opcode
	50	+ return false;
	51	+ }
	52	+ return false; //XXX - currently considered as part of opcode//true;
	53	+ }
	54	+ if (b0==0x67) { // address size prefix
	55	+ return true;
	56	+ }
	57	+ return false;
	58	+}
	59	+
	60	+// Returns prefix count from 0 to 4, or ((unsigned int)-1) on error
	61	+unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst)
	62	+{
	63	+ const unsigned char * my_bytes = bytes;
	64	+
	65	+ while( 1 )
	66	+ {
	67	+ unsigned char by1 = *my_bytes;
	68	+ unsigned char by2 = *(my_bytes + 1);
	69	+ Inst::PrefGroups where;
	70	+
	71	+ switch( by1 )
	72	+ {
	73	+ case InstPrefix_REPNE:
	74	+ case InstPrefix_REP:
	75	+ {
	76	+ if( 0x0F == by2)
	77	+ {
	78	+ return pinst->prefc;
	79	+ }
	80	+ }
	81	+ case InstPrefix_LOCK:
	82	+ {
	83	+ where = Inst::Group1;
	84	+ break;
	85	+ }
	86	+ case InstPrefix_CS:
	87	+ case InstPrefix_SS:
	88	+ case InstPrefix_DS:
	89	+ case InstPrefix_ES:
	90	+ case InstPrefix_FS:
	91	+ case InstPrefix_GS:
	92	+// case InstPrefix_HintTaken: the same as CS override
	93	+// case InstPrefix_HintNotTaken: the same as DS override
	94	+ {
	95	+ where = Inst::Group2;
	96	+ break;
	97	+ }
	98	+ case InstPrefix_OpndSize:
	99	+ {
	100	+//NOTE: prefix does not work for JMP Sz16, the opcode is 0x66 0xe9
	101	+// here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9
	102	+// it will match JMP Sz32 ...
	103	+//HACK: assume it is the last prefix, return any way
	104	+ if( 0x0F == by2)
	105	+ {
	106	+ return pinst->prefc;
	107	+ }
	108	+ return pinst->prefc;
	109	+ where = Inst::Group3;
	110	+ break;
	111	+ }
	112	+ case InstPrefix_AddrSize:
	113	+ {
	114	+ where = Inst::Group4;
	115	+ break;
	116	+ }
	117	+ default:
	118	+ {
	119	+ return pinst->prefc;
	120	+ }
	121	+ }
	122	+ // Assertions are not allowed here.
	123	+ // Error situations should result in returning error status
	124	+ if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group
	125	+ return (unsigned int)-1;
	126	+
	127	+ pinst->pref[where] = (InstPrefix)by1;
	128	+
	129	+ if (pinst->prefc >= 4) //no more than 4 prefixes
	130	+ return (unsigned int)-1;
	131	+
	132	+ pinst->prefc++;
	133	+ ++my_bytes;
	134	+ }
	135	+}
	136	+
	137	+
	138	+
	139	+unsigned DecoderBase::decode(const void * addr, Inst * pinst)
	140	+{
	141	+ Inst tmp;
	142	+
	143	+ //assert( (unsigned char)addr != 0x66);
	144	+
	145	+ const unsigned char * bytes = (unsigned char*)addr;
	146	+
	147	+ // Load up to 4 prefixes
	148	+ // for each Mnemonic
	149	+ unsigned int pref_count = fill_prefs(bytes, &tmp);
	150	+
	151	+ if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes
	152	+ return 0; // Error
	153	+
	154	+ bytes += pref_count;
	155	+
	156	+ // for each opcodedesc
	157	+ // if (raw_len == 0) memcmp(, raw_len)
	158	+ // else check the mixed state which is one of the following:
	159	+ // /digit /i /rw /rd /rb
	160	+
	161	+ bool found = false;
	162	+ const unsigned char * saveBytes = bytes;
	163	+ for (unsigned mn=1; mn<Mnemonic_Count; mn++) {
	164	+ bytes = saveBytes;
	165	+ found=try_mn((Mnemonic)mn, &bytes, &tmp);
	166	+ if (found) {
	167	+ tmp.mn = (Mnemonic)mn;
	168	+ break;
	169	+ }
	170	+ }
	171	+ if (!found) {
	172	+ // Unknown opcode
	173	+ return 0;
	174	+ }
	175	+ tmp.size = (unsigned)(bytes-(const unsigned char*)addr);
	176	+ if (pinst) {
	177	+ *pinst = tmp;
	178	+ }
	179	+ return tmp.size;
	180	+}
	181	+
	182	+#ifdef _EM64T_
	183	+#define EXTEND_REG(reg, flag) \
	184	+ ((NULL == rex \|\| 0 == rex->flag) ? reg : (reg + 8))
	185	+#else
	186	+#define EXTEND_REG(reg, flag) (reg)
	187	+#endif
	188	+
	189	+//don't know the use of rex, seems not used when _EM64T_ is not enabled
	190	+bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux,
	191	+ const unsigned char ** pbuf, Inst * pinst
	192	+#ifdef _EM64T_
	193	+ , const Rex UNREF *rex
	194	+#endif
	195	+ )
	196	+{
	197	+ OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask);
	198	+ unsigned byte = (aux & OpcodeByteKind_OpcodeMask);
	199	+ unsigned data_byte = **pbuf;
	200	+ EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
	201	+ const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
	202	+
	203	+ switch (kind) {
	204	+ case OpcodeByteKind_SlashR:
	205	+ {
	206	+ RegName reg;
	207	+ OpndKind okind;
	208	+ const ModRM& modrm = (ModRM)*pbuf;
	209	+ if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory
	210	+#ifdef _EM64T_
	211	+ decodeModRM(odesc, pbuf, pinst, rex);
	212	+#else
	213	+ decodeModRM(odesc, pbuf, pinst);
	214	+#endif
	215	+ ++pinst->argc;
	216	+ const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc];
	217	+ okind = ((opndDesc2.kind & OpndKind_XMMReg) \|\| opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
	218	+ EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
	219	+ reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r));
	220	+ regOpnd = EncoderBase::Operand(reg);
	221	+ } else { // 2nd operand is memory
	222	+ okind = ((opndDesc.kind & OpndKind_XMMReg) \|\| opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
	223	+ EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
	224	+ reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r));
	225	+ regOpnd = EncoderBase::Operand(reg);
	226	+ ++pinst->argc;
	227	+#ifdef _EM64T_
	228	+ decodeModRM(odesc, pbuf, pinst, rex);
	229	+#else
	230	+ decodeModRM(odesc, pbuf, pinst);
	231	+#endif
	232	+ }
	233	+ ++pinst->argc;
	234	+ }
	235	+ return true;
	236	+ case OpcodeByteKind_rb:
	237	+ case OpcodeByteKind_rw:
	238	+ case OpcodeByteKind_rd:
	239	+ {
	240	+ // Gregory -
	241	+ // Here we don't parse register because for current needs
	242	+ // disassembler doesn't require to parse all operands
	243	+ unsigned regid = data_byte - byte;
	244	+ if (regid>7) {
	245	+ return false;
	246	+ }
	247	+ OpndSize opnd_size;
	248	+ switch(kind)
	249	+ {
	250	+ case OpcodeByteKind_rb:
	251	+ {
	252	+ opnd_size = OpndSize_8;
	253	+ break;
	254	+ }
	255	+ case OpcodeByteKind_rw:
	256	+ {
	257	+ opnd_size = OpndSize_16;
	258	+ break;
	259	+ }
	260	+ case OpcodeByteKind_rd:
	261	+ {
	262	+ opnd_size = OpndSize_32;
	263	+ break;
	264	+ }
	265	+ default:
	266	+ opnd_size = OpndSize_32; // so there is no compiler warning
	267	+ assert( false );
	268	+ }
	269	+ opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) );
	270	+
	271	+ ++pinst->argc;
	272	+ ++*pbuf;
	273	+ return true;
	274	+ }
	275	+ case OpcodeByteKind_cb:
	276	+ {
	277	+ char offset = (char)*pbuf;
	278	+ *pbuf += 1;
	279	+ opnd = EncoderBase::Operand(offset);
	280	+ ++pinst->argc;
	281	+ //pinst->direct_addr = (void)(pinst->offset + pbuf);
	282	+ }
	283	+ return true;
	284	+ case OpcodeByteKind_cw:
	285	+ // not an error, but not expected in current env
	286	+ // Android x86
	287	+ {
	288	+ short offset = (short)*pbuf;
	289	+ *pbuf += 2;
	290	+ opnd = EncoderBase::Operand(offset);
	291	+ ++pinst->argc;
	292	+ }
	293	+ return true;
	294	+ //return false;
	295	+ case OpcodeByteKind_cd:
	296	+ {
	297	+ int offset = (int)*pbuf;
	298	+ *pbuf += 4;
	299	+ opnd = EncoderBase::Operand(offset);
	300	+ ++pinst->argc;
	301	+ }
	302	+ return true;
	303	+ case OpcodeByteKind_SlashNum:
	304	+ {
	305	+ const ModRM& modrm = (ModRM)*pbuf;
	306	+ if (modrm.reg != byte) {
	307	+ return false;
	308	+ }
	309	+ decodeModRM(odesc, pbuf, pinst
	310	+#ifdef _EM64T_
	311	+ , rex
	312	+#endif
	313	+ );
	314	+ ++pinst->argc;
	315	+ }
	316	+ return true;
	317	+ case OpcodeByteKind_ib:
	318	+ {
	319	+ char ival = (char)*pbuf;
	320	+ opnd = EncoderBase::Operand(ival);
	321	+ ++pinst->argc;
	322	+ *pbuf += 1;
	323	+ }
	324	+ return true;
	325	+ case OpcodeByteKind_iw:
	326	+ {
	327	+ short ival = (short)*pbuf;
	328	+ opnd = EncoderBase::Operand(ival);
	329	+ ++pinst->argc;
	330	+ *pbuf += 2;
	331	+ }
	332	+ return true;
	333	+ case OpcodeByteKind_id:
	334	+ {
	335	+ int ival = (int)*pbuf;
	336	+ opnd = EncoderBase::Operand(ival);
	337	+ ++pinst->argc;
	338	+ *pbuf += 4;
	339	+ }
	340	+ return true;
	341	+#ifdef _EM64T_
	342	+ case OpcodeByteKind_io:
	343	+ {
	344	+ long long int ival = (long long int)*pbuf;
	345	+ opnd = EncoderBase::Operand(OpndSize_64, ival);
	346	+ ++pinst->argc;
	347	+ *pbuf += 8;
	348	+ }
	349	+ return true;
	350	+#endif
	351	+ case OpcodeByteKind_plus_i:
	352	+ {
	353	+ unsigned regid = data_byte - byte;
	354	+ if (regid>7) {
	355	+ return false;
	356	+ }
	357	+ ++*pbuf;
	358	+ return true;
	359	+ }
	360	+ case OpcodeByteKind_ZeroOpcodeByte: // cant be here
	361	+ return false;
	362	+ default:
	363	+ // unknown kind ? how comes ?
	364	+ break;
	365	+ }
	366	+ return false;
	367	+}
	368	+
	369	+bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) {
	370	+ const unsigned char * save_pbuf = *pbuf;
	371	+ EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn];
	372	+
	373	+ for (unsigned i=0; !opcodes[i].last; i++) {
	374	+ const EncoderBase::OpcodeDesc& odesc = opcodes[i];
	375	+ char opcode_ptr = const_cast<char >(odesc.opcode);
	376	+ int opcode_len = odesc.opcode_len;
	377	+#ifdef _EM64T_
	378	+ Rex *prex = NULL;
	379	+ Rex rex;
	380	+#endif
	381	+
	382	+ *pbuf = save_pbuf;
	383	+#ifdef _EM64T_
	384	+ // Match REX prefixes
	385	+ unsigned char rex_byte = (*pbuf)[0];
	386	+ if ((rex_byte & 0xf0) == 0x40)
	387	+ {
	388	+ if ((rex_byte & 0x08) != 0)
	389	+ {
	390	+ // Have REX.W
	391	+ if (opcode_len > 0 && opcode_ptr[0] == 0x48)
	392	+ {
	393	+ // Have REX.W in opcode. All mnemonics that allow
	394	+ // REX.W have to have specified it in opcode,
	395	+ // otherwise it is not allowed
	396	+ rex = (Rex )*pbuf;
	397	+ prex = &rex;
	398	+ (*pbuf)++;
	399	+ opcode_ptr++;
	400	+ opcode_len--;
	401	+ }
	402	+ }
	403	+ else
	404	+ {
	405	+ // No REX.W, so it doesn't have to be in opcode. We
	406	+ // have REX.B, REX.X, REX.R or their combination, but
	407	+ // not in opcode, they may extend any part of the
	408	+ // instruction
	409	+ rex = (Rex )*pbuf;
	410	+ prex = &rex;
	411	+ (*pbuf)++;
	412	+ }
	413	+ }
	414	+#endif
	415	+ if (opcode_len != 0) {
	416	+ if (memcmp(*pbuf, opcode_ptr, opcode_len)) {
	417	+ continue;
	418	+ }
	419	+ *pbuf += opcode_len;
	420	+ }
	421	+ if (odesc.aux0 != 0) {
	422	+
	423	+ if (!decode_aux(odesc, odesc.aux0, pbuf, pinst
	424	+#ifdef _EM64T_
	425	+ , prex
	426	+#endif
	427	+ )) {
	428	+ continue;
	429	+ }
	430	+ if (odesc.aux1 != 0) {
	431	+ if (!decode_aux(odesc, odesc.aux1, pbuf, pinst
	432	+#ifdef _EM64T_
	433	+ , prex
	434	+#endif
	435	+ )) {
	436	+ continue;
	437	+ }
	438	+ }
	439	+ pinst->odesc = &opcodes[i];
	440	+ return true;
	441	+ }
	442	+ else {
	443	+ // Can't have empty opcode
	444	+ assert(opcode_len != 0);
	445	+ pinst->odesc = &opcodes[i];
	446	+ return true;
	447	+ }
	448	+ }
	449	+ return false;
	450	+}
	451	+
	452	+bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc,
	453	+ const unsigned char ** pbuf, Inst * pinst
	454	+#ifdef _EM64T_
	455	+ , const Rex *rex
	456	+#endif
	457	+ )
	458	+{
	459	+ EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
	460	+ const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
	461	+
	462	+ //XXX debug ///assert(0x66 != (pbuf-2));
	463	+ const ModRM& modrm = (ModRM)*pbuf;
	464	+ *pbuf += 1;
	465	+
	466	+ RegName base = RegName_Null;
	467	+ RegName index = RegName_Null;
	468	+ int disp = 0;
	469	+ unsigned scale = 0;
	470	+
	471	+ // On x86_64 all mnemonics that allow REX.W have REX.W in opcode.
	472	+ // Therefore REX.W is simply ignored, and opndDesc.size is used
	473	+
	474	+ if (modrm.mod == 3) {
	475	+ // we have only modrm. no sib, no disp.
	476	+ // Android x86: Use XMMReg for 64b operand.
	477	+ OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) \|\| opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
	478	+ RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b));
	479	+ opnd = EncoderBase::Operand(reg);
	480	+ return true;
	481	+ }
	482	+ //Android x86: m16, m32, m64: mean a byte[word\|doubleword] operand in memory
	483	+ //base and index should be 32 bits!!!
	484	+ const SIB& sib = (SIB)*pbuf;
	485	+ // check whether we have a sib
	486	+ if (modrm.rm == 4) {
	487	+ // yes, we have SIB
	488	+ *pbuf += 1;
	489	+ if (sib.index != 4) {
	490	+ index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size
	491	+ } else {
	492	+ // (sib.index == 4) => no index
	493	+ //%esp can't be sib.index
	494	+ }
	495	+
	496	+ // scale = sib.scale == 0 ? 0 : (1<<sib.scale);
	497	+ // scale = (1<<sib.scale);
	498	+ scale = (index == RegName_Null) ? 0 : (1<<sib.scale);
	499	+
	500	+ if (sib.base != 5 \|\| modrm.mod != 0) {
	501	+ base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size
	502	+ } else {
	503	+ // (sib.base == 5 && modrm.mod == 0) => no base
	504	+ }
	505	+ }
	506	+ else {
	507	+ if (modrm.mod != 0 \|\| modrm.rm != 5) {
	508	+ base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size
	509	+ }
	510	+ else {
	511	+ // mod=0 && rm == 5 => only disp32
	512	+ }
	513	+ }
	514	+
	515	+ //update disp and pbuf
	516	+ if (modrm.mod == 2) {
	517	+ // have disp32
	518	+ disp = (int)*pbuf;
	519	+ *pbuf += 4;
	520	+ }
	521	+ else if (modrm.mod == 1) {
	522	+ // have disp8
	523	+ disp = (char)*pbuf;
	524	+ *pbuf += 1;
	525	+ }
	526	+ else {
	527	+ assert(modrm.mod == 0);
	528	+ if (modrm.rm == 5) {
	529	+ // have disp32 w/o sib
	530	+ disp = (int)*pbuf;
	531	+ *pbuf += 4;
	532	+ }
	533	+ else if (modrm.rm == 4 && sib.base == 5) {
	534	+ // have disp32 with SI in sib
	535	+ disp = (int)*pbuf;
	536	+ *pbuf += 4;
	537	+ }
	538	+ }
	539	+ opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp);
	540	+ return true;
	541	+}

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/dec_base.h

		@@ -0,0 +1,135 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+
	21	+/**
	22	+ * @file
	23	+ * @brief Main decoding (disassembling) routines and structures.
	24	+ *
	25	+ * @note Quick and rough implementation, subject for a change.
	26	+ */
	27	+
	28	+#ifndef __DEC_BASE_H_INCLUDED__
	29	+#define __DEC_BASE_H_INCLUDED__
	30	+
	31	+
	32	+#include "enc_base.h"
	33	+#include "enc_prvt.h"
	34	+
	35	+#ifdef ENCODER_ISOLATE
	36	+using namespace enc_ia32;
	37	+#endif
	38	+
	39	+#define IF_CONDITIONAL (0x00000000)
	40	+#define IF_SYMMETRIC (0x00000000)
	41	+#define IF_BRANCH (0x00000000)
	42	+
	43	+struct Inst {
	44	+ Inst() {
	45	+ mn = Mnemonic_Null;
	46	+ prefc = 0;
	47	+ size = 0;
	48	+ flags = 0;
	49	+ //offset = 0;
	50	+ //direct_addr = NULL;
	51	+ argc = 0;
	52	+ for(int i = 0; i < 4; ++i)
	53	+ {
	54	+ pref[i] = InstPrefix_Null;
	55	+ }
	56	+ }
	57	+ /**
	58	+ * Mnemonic of the instruction.s
	59	+ */
	60	+ Mnemonic mn;
	61	+ /**
	62	+ * Enumerating of indexes in the pref array.
	63	+ */
	64	+ enum PrefGroups
	65	+ {
	66	+ Group1 = 0,
	67	+ Group2,
	68	+ Group3,
	69	+ Group4
	70	+ };
	71	+ /**
	72	+ * Number of prefixes (1 byte each).
	73	+ */
	74	+ unsigned int prefc;
	75	+ /**
	76	+ * Instruction prefixes. Prefix should be placed here according to its group.
	77	+ */
	78	+ InstPrefix pref[4];
	79	+ /**
	80	+ * Size, in bytes, of the instruction.
	81	+ */
	82	+ unsigned size;
	83	+ /**
	84	+ * Flags of the instruction.
	85	+ * @see MF_
	86	+ */
	87	+ unsigned flags;
	88	+ /**
	89	+ * An offset of target address, in case of 'CALL offset',
	90	+ * 'JMP/Jcc offset'.
	91	+ */
	92	+ //int offset;
	93	+ /**
	94	+ * Direct address of the target (on Intel64/IA-32 is 'instruction IP' +
	95	+ * 'instruction length' + offset).
	96	+ */
	97	+ //void * direct_addr;
	98	+ /**
	99	+ * Number of arguments of the instruction.
	100	+ */
	101	+ unsigned argc;
	102	+ //
	103	+ EncoderBase::Operand operands[3];
	104	+ //
	105	+ const EncoderBase::OpcodeDesc * odesc;
	106	+};
	107	+
	108	+inline bool is_jcc(Mnemonic mn)
	109	+{
	110	+ return Mnemonic_JO <= mn && mn<=Mnemonic_JG;
	111	+}
	112	+
	113	+class DecoderBase {
	114	+public:
	115	+ static unsigned decode(const void * addr, Inst * pinst);
	116	+private:
	117	+ static bool decodeModRM(const EncoderBase::OpcodeDesc& odesc,
	118	+ const unsigned char ** pbuf, Inst * pinst
	119	+#ifdef _EM64T_
	120	+ , const Rex *rex
	121	+#endif
	122	+ );
	123	+ static bool decode_aux(const EncoderBase::OpcodeDesc& odesc,
	124	+ unsigned aux, const unsigned char ** pbuf,
	125	+ Inst * pinst
	126	+#ifdef _EM64T_
	127	+ , const Rex *rex
	128	+#endif
	129	+ );
	130	+ static bool try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst);
	131	+ static unsigned int fill_prefs( const unsigned char * bytes, Inst * pinst);
	132	+ static bool is_prefix(const unsigned char * bytes);
	133	+};
	134	+
	135	+#endif // ~ __DEC_BASE_H_INCLUDED__

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/enc_base.cpp

		@@ -0,0 +1,1137 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+#include "enc_base.h"
	21	+//#include <climits>
	22	+#include <string.h>
	23	+#define USE_ENCODER_DEFINES
	24	+#include "enc_prvt.h"
	25	+#include <stdio.h>
	26	+
	27	+//#define JET_PROTO
	28	+
	29	+#ifdef JET_PROTO
	30	+#include "dec_base.h"
	31	+#include "jvmti_dasm.h"
	32	+#endif
	33	+
	34	+ENCODER_NAMESPACE_START
	35	+
	36	+/**
	37	+ * @file
	38	+ * @brief Main encoding routines and structures.
	39	+ */
	40	+
	41	+#ifndef _WIN32
	42	+ #define strcmpi strcasecmp
	43	+#endif
	44	+
	45	+int EncoderBase::dummy = EncoderBase::buildTable();
	46	+
	47	+const unsigned char EncoderBase::size_hash[OpndSize_64+1] = {
	48	+ //
	49	+ 0xFF, // OpndSize_Null = 0,
	50	+ 3, // OpndSize_8 = 0x1,
	51	+ 2, // OpndSize_16 = 0x2,
	52	+ 0xFF, // 0x3
	53	+ 1, // OpndSize_32 = 0x4,
	54	+ 0xFF, // 0x5
	55	+ 0xFF, // 0x6
	56	+ 0xFF, // 0x7
	57	+ 0, // OpndSize_64 = 0x8,
	58	+ //
	59	+};
	60	+
	61	+const unsigned char EncoderBase::kind_hash[OpndKind_Mem+1] = {
	62	+ //
	63	+ //gp reg -> 000 = 0
	64	+ //memory -> 001 = 1
	65	+ //immediate -> 010 = 2
	66	+ //xmm reg -> 011 = 3
	67	+ //segment regs -> 100 = 4
	68	+ //fp reg -> 101 = 5
	69	+ //mmx reg -> 110 = 6
	70	+ //
	71	+ 0xFF, // 0 OpndKind_Null=0,
	72	+ 0<<2, // 1 OpndKind_GPReg =
	73	+ // OpndKind_MinRegKind=0x1,
	74	+ 4<<2, // 2 OpndKind_SReg=0x2,
	75	+
	76	+#ifdef _HAVE_MMX_
	77	+ 6<<2, // 3
	78	+#else
	79	+ 0xFF, // 3
	80	+#endif
	81	+
	82	+ 5<<2, // 4 OpndKind_FPReg=0x4,
	83	+ 0xFF, 0xFF, 0xFF, // 5, 6, 7
	84	+ 3<<2, // OpndKind_XMMReg=0x8,
	85	+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 9, 0xA, 0xB, 0xC, 0xD,
	86	+ // 0xE, 0xF
	87	+ 0xFF, // OpndKind_MaxRegKind =
	88	+ // OpndKind_StatusReg =
	89	+ // OpndKind_OtherReg=0x10,
	90	+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x11-0x18
	91	+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x19-0x1F
	92	+ 2<<2, // OpndKind_Immediate=0x20,
	93	+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x21-0x28
	94	+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x29-0x30
	95	+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x31-0x38
	96	+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x39-0x3F
	97	+ 1<<2, // OpndKind_Memory=0x40
	98	+};
	99	+
	100	+char * EncoderBase::curRelOpnd[3];
	101	+
	102	+char* EncoderBase::encode_aux(char* stream, unsigned aux,
	103	+ const Operands& opnds, const OpcodeDesc * odesc,
	104	+ unsigned * pargsCount, Rex * prex)
	105	+{
	106	+ const unsigned byte = aux;
	107	+ OpcodeByteKind kind = (OpcodeByteKind)(byte & OpcodeByteKind_KindMask);
	108	+ // The '>>' here is to force the switch to be table-based) instead of
	109	+ // set of CMP+Jcc.
	110	+ if (*pargsCount >= COUNTOF(opnds)) {
	111	+ assert(false);
	112	+ return stream;
	113	+ }
	114	+ switch(kind>>8) {
	115	+ case OpcodeByteKind_SlashR>>8:
	116	+ // /r - Indicates that the ModR/M byte of the instruction contains
	117	+ // both a register operand and an r/m operand.
	118	+ {
	119	+ assert(opnds.count() > 1);
	120	+ // not true anymore for MOVQ xmm<->r
	121	+ //assert((odesc->opnds[0].kind & OpndKind_Mem) \|\|
	122	+ // (odesc->opnds[1].kind & OpndKind_Mem));
	123	+ unsigned memidx = odesc->opnds[0].kind & OpndKind_Mem ? 0 : 1;
	124	+ unsigned regidx = memidx == 0 ? 1 : 0;
	125	+ memidx += *pargsCount;
	126	+ regidx += *pargsCount;
	127	+ ModRM& modrm = (ModRM)stream;
	128	+ if (memidx >= COUNTOF(opnds) \|\| regidx >= COUNTOF(opnds)) {
	129	+ assert(false);
	130	+ break;
	131	+ }
	132	+ if (opnds[memidx].is_mem()) {
	133	+ stream = encodeModRM(stream, opnds, memidx, odesc, prex);
	134	+ }
	135	+ else {
	136	+ modrm.mod = 3; // 11
	137	+ modrm.rm = getHWRegIndex(opnds[memidx].reg());
	138	+#ifdef _EM64T_
	139	+ if (opnds[memidx].need_rex() && needs_rex_r(opnds[memidx].reg())) {
	140	+ prex->b = 1;
	141	+ }
	142	+#endif
	143	+ ++stream;
	144	+ }
	145	+ modrm.reg = getHWRegIndex(opnds[regidx].reg());
	146	+#ifdef _EM64T_
	147	+ if (opnds[regidx].need_rex() && needs_rex_r(opnds[regidx].reg())) {
	148	+ prex->r = 1;
	149	+ }
	150	+#endif
	151	+ *pargsCount += 2;
	152	+ }
	153	+ break;
	154	+ case OpcodeByteKind_SlashNum>>8:
	155	+ // /digit - A digit between 0 and 7 indicates that the
	156	+ // ModR/M byte of the instruction uses only the r/m
	157	+ // (register or memory) operand. The reg field contains
	158	+ // the digit that provides an extension to the instruction's
	159	+ // opcode.
	160	+ {
	161	+ const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
	162	+ assert(lowByte <= 7);
	163	+ ModRM& modrm = (ModRM)stream;
	164	+ unsigned idx = *pargsCount;
	165	+ assert(opnds[idx].is_mem() \|\| opnds[idx].is_reg());
	166	+ if (opnds[idx].is_mem()) {
	167	+ stream = encodeModRM(stream, opnds, idx, odesc, prex);
	168	+ }
	169	+ else {
	170	+ modrm.mod = 3; // 11
	171	+ modrm.rm = getHWRegIndex(opnds[idx].reg());
	172	+#ifdef _EM64T_
	173	+ if (opnds[idx].need_rex() && needs_rex_r(opnds[idx].reg())) {
	174	+ prex->b = 1;
	175	+ }
	176	+#endif
	177	+ ++stream;
	178	+ }
	179	+ modrm.reg = (char)lowByte;
	180	+ *pargsCount += 1;
	181	+ }
	182	+ break;
	183	+ case OpcodeByteKind_plus_i>>8:
	184	+ // +i - A number used in floating-point instructions when one
	185	+ // of the operands is ST(i) from the FPU register stack. The
	186	+ // number i (which can range from 0 to 7) is added to the
	187	+ // hexadecimal byte given at the left of the plus sign to form
	188	+ // a single opcode byte.
	189	+ {
	190	+ unsigned idx = *pargsCount;
	191	+ const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
	192	+ *stream = (char)lowByte + getHWRegIndex(opnds[idx].reg());
	193	+ ++stream;
	194	+ *pargsCount += 1;
	195	+ }
	196	+ break;
	197	+ case OpcodeByteKind_ib>>8:
	198	+ case OpcodeByteKind_iw>>8:
	199	+ case OpcodeByteKind_id>>8:
	200	+#ifdef _EM64T_
	201	+ case OpcodeByteKind_io>>8:
	202	+#endif //_EM64T_
	203	+ // ib, iw, id - A 1-byte (ib), 2-byte (iw), or 4-byte (id)
	204	+ // immediate operand to the instruction that follows the
	205	+ // opcode, ModR/M bytes or scale-indexing bytes. The opcode
	206	+ // determines if the operand is a signed value. All words
	207	+ // and double words are given with the low-order byte first.
	208	+ {
	209	+ unsigned idx = *pargsCount;
	210	+ *pargsCount += 1;
	211	+ assert(opnds[idx].is_imm());
	212	+ if (kind == OpcodeByteKind_ib) {
	213	+ (unsigned char)stream = (unsigned char)opnds[idx].imm();
	214	+ curRelOpnd[idx] = stream;
	215	+ stream += 1;
	216	+ }
	217	+ else if (kind == OpcodeByteKind_iw) {
	218	+ (unsigned short)stream = (unsigned short)opnds[idx].imm();
	219	+ curRelOpnd[idx] = stream;
	220	+ stream += 2;
	221	+ }
	222	+ else if (kind == OpcodeByteKind_id) {
	223	+ (unsigned)stream = (unsigned)opnds[idx].imm();
	224	+ curRelOpnd[idx] = stream;
	225	+ stream += 4;
	226	+ }
	227	+#ifdef _EM64T_
	228	+ else {
	229	+ assert(kind == OpcodeByteKind_io);
	230	+ (long long)stream = (long long)opnds[idx].imm();
	231	+ curRelOpnd[idx] = stream;
	232	+ stream += 8;
	233	+ }
	234	+#else
	235	+ else {
	236	+ assert(false);
	237	+ }
	238	+#endif
	239	+ }
	240	+ break;
	241	+ case OpcodeByteKind_cb>>8:
	242	+ assert(opnds[*pargsCount].is_imm());
	243	+ (unsigned char)stream = (unsigned char)opnds[*pargsCount].imm();
	244	+ curRelOpnd[*pargsCount]= stream;
	245	+ stream += 1;
	246	+ *pargsCount += 1;
	247	+ break;
	248	+ case OpcodeByteKind_cw>>8:
	249	+ assert(opnds[*pargsCount].is_imm());
	250	+ (unsigned short)stream = (unsigned short)opnds[*pargsCount].imm();
	251	+ curRelOpnd[*pargsCount]= stream;
	252	+ stream += 2;
	253	+ *pargsCount += 1;
	254	+ break;
	255	+ case OpcodeByteKind_cd>>8:
	256	+ assert(opnds[*pargsCount].is_imm());
	257	+ (unsigned)stream = (unsigned)opnds[*pargsCount].imm();
	258	+ curRelOpnd[*pargsCount]= stream;
	259	+ stream += 4;
	260	+ *pargsCount += 1;
	261	+ break;
	262	+ //OpcodeByteKind_cp = 0x0B00,
	263	+ //OpcodeByteKind_co = 0x0C00,
	264	+ //OpcodeByteKind_ct = 0x0D00,
	265	+ case OpcodeByteKind_rb>>8:
	266	+ case OpcodeByteKind_rw>>8:
	267	+ case OpcodeByteKind_rd>>8:
	268	+ // +rb, +rw, +rd - A register code, from 0 through 7,
	269	+ // added to the hexadecimal byte given at the left of
	270	+ // the plus sign to form a single opcode byte.
	271	+ assert(opnds.count() > 0);
	272	+ assert(opnds[*pargsCount].is_reg());
	273	+ {
	274	+ const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
	275	+ (unsigned char)stream = (unsigned char)lowByte +
	276	+ getHWRegIndex(opnds[*pargsCount].reg());
	277	+#ifdef _EM64T_
	278	+ if (opnds[pargsCount].need_rex() && needs_rex_r(opnds[pargsCount].reg())) {
	279	+ prex->b = 1;
	280	+ }
	281	+#endif
	282	+ ++stream;
	283	+ *pargsCount += 1;
	284	+ }
	285	+ break;
	286	+ default:
	287	+ assert(false);
	288	+ break;
	289	+ }
	290	+ return stream;
	291	+}
	292	+
	293	+char * EncoderBase::encode(char * stream, Mnemonic mn, const Operands& opnds)
	294	+{
	295	+#ifdef _DEBUG
	296	+ if (opnds.count() > 0) {
	297	+ if (opnds[0].is_mem()) {
	298	+ assert(getRegKind(opnds[0].base()) != OpndKind_SReg);
	299	+ }
	300	+ else if (opnds.count() >1 && opnds[1].is_mem()) {
	301	+ assert(getRegKind(opnds[1].base()) != OpndKind_SReg);
	302	+ }
	303	+ }
	304	+#endif
	305	+
	306	+#ifdef JET_PROTO
	307	+ char* saveStream = stream;
	308	+#endif
	309	+
	310	+ const OpcodeDesc * odesc = lookup(mn, opnds);
	311	+#if !defined(_EM64T_)
	312	+ bool copy_opcode = true;
	313	+ Rex *prex = NULL;
	314	+#else
	315	+ // We need rex if
	316	+ // either of registers used as operand or address form is new extended register
	317	+ // it's explicitly specified by opcode
	318	+ // So, if we don't have REX in opcode but need_rex, then set rex here
	319	+ // otherwise, wait until opcode is set, and then update REX
	320	+
	321	+ bool copy_opcode = true;
	322	+ unsigned char _1st = odesc->opcode[0];
	323	+
	324	+ Rex prex = (Rex)stream;
	325	+ if (opnds.need_rex() &&
	326	+ ((_1st == 0x66) \|\| (_1st == 0xF2 \|\| _1st == 0xF3) && odesc->opcode[1] == 0x0F)) {
	327	+ // Special processing
	328	+ //
	329	+ copy_opcode = false;
	330	+ //
	331	+ (unsigned char)stream = _1st;
	332	+ ++stream;
	333	+ //
	334	+ prex = (Rex*)stream;
	335	+ prex->dummy = 4;
	336	+ prex->w = 0;
	337	+ prex->b = 0;
	338	+ prex->x = 0;
	339	+ prex->r = 0;
	340	+ ++stream;
	341	+ //
	342	+ memcpy(stream, &odesc->opcode[1], odesc->opcode_len-1);
	343	+ stream += odesc->opcode_len-1;
	344	+ }
	345	+ else if (_1st != 0x48 && opnds.need_rex()) {
	346	+ prex = (Rex*)stream;
	347	+ prex->dummy = 4;
	348	+ prex->w = 0;
	349	+ prex->b = 0;
	350	+ prex->x = 0;
	351	+ prex->r = 0;
	352	+ ++stream;
	353	+ }
	354	+#endif // ifndef EM64T
	355	+
	356	+ if (copy_opcode) {
	357	+ if (odesc->opcode_len==1) {
	358	+ unsigned char dest = (unsigned char ) (stream);
	359	+ unsigned char src = (unsigned char ) (& (odesc->opcode));
	360	+ dest = src;
	361	+ }
	362	+ else if (odesc->opcode_len==2) {
	363	+ short dest = (short ) (stream);
	364	+ void ptr = (void ) (& (odesc->opcode));
	365	+ short src = (short ) (ptr);
	366	+ dest = src;
	367	+ }
	368	+ else if (odesc->opcode_len==3) {
	369	+ unsigned short dest = (unsigned short ) (stream);
	370	+ void ptr = (void ) (& (odesc->opcode));
	371	+ unsigned short src = (unsigned short ) (ptr);
	372	+ dest = src;
	373	+
	374	+ //Now handle the last part
	375	+ unsigned char dest2 = (unsigned char ) (stream + 2);
	376	+ *dest2 = odesc->opcode[2];
	377	+ }
	378	+ else if (odesc->opcode_len==4) {
	379	+ unsigned int dest = (unsigned int ) (stream);
	380	+ void ptr = (void ) (& (odesc->opcode));
	381	+ unsigned int src = (unsigned int ) (ptr);
	382	+ dest = src;
	383	+ }
	384	+ stream += odesc->opcode_len;
	385	+ }
	386	+
	387	+ unsigned argsCount = odesc->first_opnd;
	388	+
	389	+ if (odesc->aux0) {
	390	+ stream = encode_aux(stream, odesc->aux0, opnds, odesc, &argsCount, prex);
	391	+ if (odesc->aux1) {
	392	+ stream = encode_aux(stream, odesc->aux1, opnds, odesc, &argsCount, prex);
	393	+ }
	394	+ }
	395	+#ifdef JET_PROTO
	396	+ //saveStream
	397	+ Inst inst;
	398	+ unsigned len = DecoderBase::decode(saveStream, &inst);
	399	+ assert(inst.mn == mn);
	400	+ assert(len == (unsigned)(stream-saveStream));
	401	+ if (mn == Mnemonic_CALL \|\| mn == Mnemonic_JMP \|\|
	402	+ Mnemonic_RET == mn \|\|
	403	+ (Mnemonic_JO<=mn && mn<=Mnemonic_JG)) {
	404	+ assert(inst.argc == opnds.count());
	405	+
	406	+ InstructionDisassembler idi(saveStream);
	407	+
	408	+ for (unsigned i=0; i<inst.argc; i++) {
	409	+ const EncoderBase::Operand& original = opnds[i];
	410	+ const EncoderBase::Operand& decoded = inst.operands[i];
	411	+ assert(original.kind() == decoded.kind());
	412	+ assert(original.size() == decoded.size());
	413	+ if (original.is_imm()) {
	414	+ assert(original.imm() == decoded.imm());
	415	+ assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Imm);
	416	+ if (mn == Mnemonic_CALL) {
	417	+ assert(idi.get_type() == InstructionDisassembler::RELATIVE_CALL);
	418	+ }
	419	+ else if (mn == Mnemonic_JMP) {
	420	+ assert(idi.get_type() == InstructionDisassembler::RELATIVE_JUMP);
	421	+ }
	422	+ else if (mn == Mnemonic_RET) {
	423	+ assert(idi.get_type() == InstructionDisassembler::RET);
	424	+ }
	425	+ else {
	426	+ assert(idi.get_type() == InstructionDisassembler::RELATIVE_COND_JUMP);
	427	+ }
	428	+ }
	429	+ else if (original.is_mem()) {
	430	+ assert(original.base() == decoded.base());
	431	+ assert(original.index() == decoded.index());
	432	+ assert(original.scale() == decoded.scale());
	433	+ assert(original.disp() == decoded.disp());
	434	+ assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Mem);
	435	+ if (mn == Mnemonic_CALL) {
	436	+ assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL);
	437	+ }
	438	+ else if (mn == Mnemonic_JMP) {
	439	+ assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP);
	440	+ }
	441	+ else {
	442	+ assert(false);
	443	+ }
	444	+ }
	445	+ else {
	446	+ assert(original.is_reg());
	447	+ assert(original.reg() == decoded.reg());
	448	+ assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Reg);
	449	+ if (mn == Mnemonic_CALL) {
	450	+ assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL);
	451	+ }
	452	+ else if (mn == Mnemonic_JMP) {
	453	+ assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP);
	454	+ }
	455	+ else {
	456	+ assert(false);
	457	+ }
	458	+ }
	459	+ }
	460	+
	461	+ Inst inst2;
	462	+ len = DecoderBase::decode(saveStream, &inst2);
	463	+ }
	464	+
	465	+ // if(idi.get_length_with_prefix() != (int)len) {
	466	+ //__asm { int 3 };
	467	+ // }
	468	+#endif
	469	+
	470	+ return stream;
	471	+}
	472	+
	473	+char* EncoderBase::encodeModRM(char* stream, const Operands& opnds,
	474	+ unsigned idx, const OpcodeDesc * odesc,
	475	+ Rex * prex)
	476	+{
	477	+ const Operand& op = opnds[idx];
	478	+ assert(op.is_mem());
	479	+ assert(idx < COUNTOF(curRelOpnd));
	480	+ ModRM& modrm = (ModRM)stream;
	481	+ ++stream;
	482	+ SIB& sib = (SIB)stream;
	483	+
	484	+ // we need SIB if
	485	+ // we have index & scale (nb: having index w/o base and w/o scale
	486	+ // treated as error)
	487	+ // the base is EBP w/o disp, BUT let's use a fake disp8
	488	+ // the base is ESP (nb: cant have ESP as index)
	489	+
	490	+ RegName base = op.base();
	491	+ // only disp ?..
	492	+ if (base == RegName_Null && op.index() == RegName_Null) {
	493	+ assert(op.scale() == 0); // 'scale!=0' has no meaning without index
	494	+ // ... yes - only have disp
	495	+ // On EM64T, the simply [disp] addressing means 'RIP-based' one -
	496	+ // must have to use SIB to encode 'DS: based'
	497	+#ifdef _EM64T_
	498	+ modrm.mod = 0; // 00 - ..
	499	+ modrm.rm = 4; // 100 - have SIB
	500	+
	501	+ sib.base = 5; // 101 - none
	502	+ sib.index = 4; // 100 - none
	503	+ sib.scale = 0; //
	504	+ ++stream; // bypass SIB
	505	+#else
	506	+ // ignore disp_fits8, always use disp32.
	507	+ modrm.mod = 0;
	508	+ modrm.rm = 5;
	509	+#endif
	510	+ (unsigned)stream = (unsigned)op.disp();
	511	+ curRelOpnd[idx]= stream;
	512	+ stream += 4;
	513	+ return stream;
	514	+ }
	515	+
	516	+ //climits: error when targeting compal
	517	+#define CHAR_MIN -127
	518	+#define CHAR_MAX 127
	519	+ const bool disp_fits8 = CHAR_MIN <= op.disp() && op.disp() <= CHAR_MAX;
	520	+ /&& op.base() != RegName_Null - just checked above/
	521	+ if (op.index() == RegName_Null && getHWRegIndex(op.base()) != getHWRegIndex(REG_STACK)) {
	522	+ assert(op.scale() == 0); // 'scale!=0' has no meaning without index
	523	+ // ... luckily no SIB, only base and may be a disp
	524	+
	525	+ // EBP base is a special case. Need to use [EBP] + disp8 form
	526	+ if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) {
	527	+ modrm.mod = 0; // mod=00, no disp et all
	528	+ }
	529	+ else if (disp_fits8) {
	530	+ modrm.mod = 1; // mod=01, use disp8
	531	+ (unsigned char)stream = (unsigned char)op.disp();
	532	+ curRelOpnd[idx]= stream;
	533	+ ++stream;
	534	+ }
	535	+ else {
	536	+ modrm.mod = 2; // mod=10, use disp32
	537	+ (unsigned)stream = (unsigned)op.disp();
	538	+ curRelOpnd[idx]= stream;
	539	+ stream += 4;
	540	+ }
	541	+ modrm.rm = getHWRegIndex(op.base());
	542	+ if (is_em64t_extra_reg(op.base())) {
	543	+ prex->b = 1;
	544	+ }
	545	+ return stream;
	546	+ }
	547	+
	548	+ // cool, we do have SIB.
	549	+ ++stream; // bypass SIB in stream
	550	+
	551	+ // {E\|R}SP cannot be scaled index, however, R12 which has the same index in modrm - can
	552	+ assert(op.index() == RegName_Null \|\| !equals(op.index(), REG_STACK));
	553	+
	554	+ // Only GPRegs can be encoded in the SIB
	555	+ assert(op.base() == RegName_Null \|\|
	556	+ getRegKind(op.base()) == OpndKind_GPReg);
	557	+ assert(op.index() == RegName_Null \|\|
	558	+ getRegKind(op.index()) == OpndKind_GPReg);
	559	+
	560	+ modrm.rm = 4; // r/m = 100, means 'we have SIB here'
	561	+ if (op.base() == RegName_Null) {
	562	+ // no base.
	563	+ // already checked above if
	564	+ // the first if() //assert(op.index() != RegName_Null);
	565	+
	566	+ modrm.mod = 0; // mod=00 - here it means 'no base, but disp32'
	567	+ sib.base = 5; // 101 with mod=00 ^^^
	568	+
	569	+ // encode at least fake disp32 to avoid having [base=ebp]
	570	+ (unsigned)stream = op.disp();
	571	+ curRelOpnd[idx]= stream;
	572	+ stream += 4;
	573	+
	574	+ unsigned sc = op.scale();
	575	+ if (sc == 1 \|\| sc==0) { sib.scale = 0; } // SS=00
	576	+ else if (sc == 2) { sib.scale = 1; } // SS=01
	577	+ else if (sc == 4) { sib.scale = 2; } // SS=10
	578	+ else if (sc == 8) { sib.scale = 3; } // SS=11
	579	+ sib.index = getHWRegIndex(op.index());
	580	+ if (is_em64t_extra_reg(op.index())) {
	581	+ prex->x = 1;
	582	+ }
	583	+
	584	+ return stream;
	585	+ }
	586	+
	587	+ if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) {
	588	+ modrm.mod = 0; // mod=00, no disp
	589	+ }
	590	+ else if (disp_fits8) {
	591	+ modrm.mod = 1; // mod=01, use disp8
	592	+ (unsigned char)stream = (unsigned char)op.disp();
	593	+ curRelOpnd[idx]= stream;
	594	+ stream += 1;
	595	+ }
	596	+ else {
	597	+ modrm.mod = 2; // mod=10, use disp32
	598	+ (unsigned)stream = (unsigned)op.disp();
	599	+ curRelOpnd[idx]= stream;
	600	+ stream += 4;
	601	+ }
	602	+
	603	+ if (op.index() == RegName_Null) {
	604	+ assert(op.scale() == 0); // 'scale!=0' has no meaning without index
	605	+ // the only reason we're here without index, is that we have {E\|R}SP
	606	+ // or R12 as a base. Another possible reason - EBP without a disp -
	607	+ // is handled above by adding a fake disp8
	608	+#ifdef _EM64T_
	609	+ assert(op.base() != RegName_Null && (equals(op.base(), REG_STACK) \|\|
	610	+ equals(op.base(), RegName_R12)));
	611	+#else // _EM64T_
	612	+ assert(op.base() != RegName_Null && equals(op.base(), REG_STACK));
	613	+#endif //_EM64T_
	614	+ sib.scale = 0; // SS = 00
	615	+ sib.index = 4; // SS + index=100 means 'no index'
	616	+ }
	617	+ else {
	618	+ unsigned sc = op.scale();
	619	+ if (sc == 1 \|\| sc==0) { sib.scale = 0; } // SS=00
	620	+ else if (sc == 2) { sib.scale = 1; } // SS=01
	621	+ else if (sc == 4) { sib.scale = 2; } // SS=10
	622	+ else if (sc == 8) { sib.scale = 3; } // SS=11
	623	+ sib.index = getHWRegIndex(op.index());
	624	+ if (is_em64t_extra_reg(op.index())) {
	625	+ prex->x = 1;
	626	+ }
	627	+ // not an error by itself, but the usage of [index*1] instead
	628	+ // of [base] is discouraged
	629	+ assert(op.base() != RegName_Null \|\| op.scale() != 1);
	630	+ }
	631	+ sib.base = getHWRegIndex(op.base());
	632	+ if (is_em64t_extra_reg(op.base())) {
	633	+ prex->b = 1;
	634	+ }
	635	+ return stream;
	636	+}
	637	+
	638	+char * EncoderBase::nops(char * stream, unsigned howMany)
	639	+{
	640	+ // Recommended multi-byte NOPs from the Intel architecture manual
	641	+ static const unsigned char nops[10][9] = {
	642	+ { 0, }, // 0, this line is dummy and not used in the loop below
	643	+ { 0x90, }, // 1-byte NOP
	644	+ { 0x66, 0x90, }, // 2
	645	+ { 0x0F, 0x1F, 0x00, }, // 3
	646	+ { 0x0F, 0x1F, 0x40, 0x00, }, // 4
	647	+ { 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 5
	648	+ { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 6
	649	+ { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, }, // 7
	650	+ { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, }, // 8
	651	+ { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }, // 9-byte NOP
	652	+ };
	653	+
	654	+ // Start from delivering the longest possible NOPs, then proceed with shorter ones
	655	+ for (unsigned nopSize=9; nopSize!=0; nopSize--) {
	656	+ while(howMany>=nopSize) {
	657	+ const unsigned char* nopBytes = nops[nopSize];
	658	+ for (unsigned i=0; i<nopSize; i++) {
	659	+ stream[i] = nopBytes[i];
	660	+ }
	661	+ stream += nopSize;
	662	+ howMany -= nopSize;
	663	+ }
	664	+ }
	665	+ char* end = stream + howMany;
	666	+ return end;
	667	+}
	668	+
	669	+char * EncoderBase::prefix(char* stream, InstPrefix pref)
	670	+{
	671	+ if (pref== InstPrefix_Null) {
	672	+ // nothing to do
	673	+ return stream;
	674	+ }
	675	+ *stream = (char)pref;
	676	+ return stream + 1;
	677	+}
	678	+
	679	+
	680	+/**
	681	+ *
	682	+ */
	683	+bool EncoderBase::extAllowed(OpndExt opndExt, OpndExt instExt) {
	684	+ if (instExt == opndExt \|\| instExt == OpndExt_Any \|\| opndExt == OpndExt_Any) {
	685	+ return true;
	686	+ }
	687	+//asm("int3");
	688	+assert(0);
	689	+ return false;
	690	+}
	691	+
	692	+static bool try_match(const EncoderBase::OpcodeDesc& odesc,
	693	+ const EncoderBase::Operands& opnds, bool strict) {
	694	+
	695	+ assert(odesc.roles.count == opnds.count());
	696	+
	697	+ for(unsigned j=0; j<odesc.roles.count; j++) {
	698	+ // - the location must match exactly
	699	+ if ((odesc.opnds[j].kind & opnds[j].kind()) != opnds[j].kind()) {
	700	+ return false;
	701	+ }
	702	+ if (strict) {
	703	+ // the size must match exactly
	704	+ if (odesc.opnds[j].size != opnds[j].size()) {
	705	+ return false;
	706	+ }
	707	+ }
	708	+ else {
	709	+ // must match only for def operands, and dont care about use ones
	710	+ // situations like 'mov r8, imm32/mov r32, imm8' so the
	711	+ // destination operand defines the overall size
	712	+ if (EncoderBase::getOpndRoles(odesc.roles, j) & OpndRole_Def) {
	713	+ if (odesc.opnds[j].size != opnds[j].size()) {
	714	+ return false;
	715	+ }
	716	+ }
	717	+ }
	718	+ }
	719	+ return true;
	720	+}
	721	+
	722	+//
	723	+//Subhash implementaion - may be useful in case of many misses during fast
	724	+//opcode lookup.
	725	+//
	726	+
	727	+#ifdef ENCODER_USE_SUBHASH
	728	+static unsigned subHash[32];
	729	+
	730	+static unsigned find(Mnemonic mn, unsigned hash)
	731	+{
	732	+ unsigned key = hash % COUNTOF(subHash);
	733	+ unsigned pack = subHash[key];
	734	+ unsigned _hash = pack & 0xFFFF;
	735	+ if (_hash != hash) {
	736	+ stat.miss(mn);
	737	+ return EncoderBase::NOHASH;
	738	+ }
	739	+ unsigned _mn = (pack >> 24)&0xFF;
	740	+ if (_mn != _mn) {
	741	+ stat.miss(mn);
	742	+ return EncoderBase::NOHASH;
	743	+ }
	744	+ unsigned idx = (pack >> 16) & 0xFF;
	745	+ stat.hit(mn);
	746	+ return idx;
	747	+}
	748	+
	749	+static void put(Mnemonic mn, unsigned hash, unsigned idx)
	750	+{
	751	+ unsigned pack = hash \| (idx<<16) \| (mn << 24);
	752	+ unsigned key = hash % COUNTOF(subHash);
	753	+ subHash[key] = pack;
	754	+}
	755	+#endif
	756	+
	757	+const EncoderBase::OpcodeDesc *
	758	+EncoderBase::lookup(Mnemonic mn, const Operands& opnds)
	759	+{
	760	+ const unsigned hash = opnds.hash();
	761	+ unsigned opcodeIndex = opcodesHashMap[mn][hash];
	762	+#ifdef ENCODER_USE_SUBHASH
	763	+ if (opcodeIndex == NOHASH) {
	764	+ opcodeIndex = find(mn, hash);
	765	+ }
	766	+#endif
	767	+
	768	+ if (opcodeIndex == NOHASH) {
	769	+ // fast-path did no work. try to lookup sequentially
	770	+ const OpcodeDesc * odesc = opcodes[mn];
	771	+ int idx = -1;
	772	+ bool found = false;
	773	+ for (idx=0; !odesc[idx].last; idx++) {
	774	+ const OpcodeDesc& opcode = odesc[idx];
	775	+ if (opcode.platf == OpcodeInfo::decoder) {
	776	+ continue;
	777	+ }
	778	+ if (opcode.roles.count != opnds.count()) {
	779	+ continue;
	780	+ }
	781	+ if (try_match(opcode, opnds, true)) {
	782	+ found = true;
	783	+ break;
	784	+ }
	785	+ }
	786	+ if (!found) {
	787	+ for (idx=0; !odesc[idx].last; idx++) {
	788	+ const OpcodeDesc& opcode = odesc[idx];
	789	+ if (opcode.platf == OpcodeInfo::decoder) {
	790	+ continue;
	791	+ }
	792	+ if (opcode.roles.count != opnds.count()) {
	793	+ continue;
	794	+ }
	795	+ if (try_match(opcode, opnds, false)) {
	796	+ found = true;
	797	+ break;
	798	+ }
	799	+ }
	800	+ }
	801	+ assert(found);
	802	+ opcodeIndex = idx;
	803	+#ifdef ENCODER_USE_SUBHASH
	804	+ put(mn, hash, opcodeIndex);
	805	+#endif
	806	+ }
	807	+ assert(opcodeIndex != NOHASH);
	808	+ const OpcodeDesc * odesc = &opcodes[mn][opcodeIndex];
	809	+ assert(!odesc->last);
	810	+ assert(odesc->roles.count == opnds.count());
	811	+ assert(odesc->platf != OpcodeInfo::decoder);
	812	+#if !defined(_EM64T_)
	813	+ // tuning was done for IA32 only, so no size restriction on EM64T
	814	+ //assert(sizeof(OpcodeDesc)==128);
	815	+#endif
	816	+ return odesc;
	817	+}
	818	+
	819	+char* EncoderBase::getOpndLocation(int index) {
	820	+ assert(index < 3);
	821	+ return curRelOpnd[index];
	822	+}
	823	+
	824	+
	825	+Mnemonic EncoderBase::str2mnemonic(const char * mn_name)
	826	+{
	827	+ for (unsigned m = 1; m<Mnemonic_Count; m++) {
	828	+ if (!strcmpi(mnemonics[m].name, mn_name)) {
	829	+ return (Mnemonic)m;
	830	+ }
	831	+ }
	832	+ return Mnemonic_Null;
	833	+}
	834	+
	835	+static const char * conditionStrings[ConditionMnemonic_Count] = {
	836	+ "O",
	837	+ "NO",
	838	+ "B",
	839	+ "AE",
	840	+ "Z",
	841	+ "NZ",
	842	+ "BE",
	843	+ "A",
	844	+
	845	+ "S",
	846	+ "NS",
	847	+ "P",
	848	+ "NP",
	849	+ "L",
	850	+ "GE",
	851	+ "LE",
	852	+ "G",
	853	+};
	854	+
	855	+const char * getConditionString(ConditionMnemonic cm) {
	856	+ return conditionStrings[cm];
	857	+}
	858	+
	859	+static const struct {
	860	+ char sizeString[12];
	861	+ OpndSize size;
	862	+}
	863	+sizes[] = {
	864	+ { "Sz8", OpndSize_8 },
	865	+ { "Sz16", OpndSize_16 },
	866	+ { "Sz32", OpndSize_32 },
	867	+ { "Sz64", OpndSize_64 },
	868	+#if !defined(TESTING_ENCODER)
	869	+ { "Sz80", OpndSize_80 },
	870	+ { "Sz128", OpndSize_128 },
	871	+#endif
	872	+ { "SzAny", OpndSize_Any },
	873	+};
	874	+
	875	+
	876	+OpndSize getOpndSize(const char * sizeString)
	877	+{
	878	+ assert(sizeString);
	879	+ for (unsigned i = 0; i<COUNTOF(sizes); i++) {
	880	+ if (!strcmpi(sizeString, sizes[i].sizeString)) {
	881	+ return sizes[i].size;
	882	+ }
	883	+ }
	884	+ return OpndSize_Null;
	885	+}
	886	+
	887	+const char * getOpndSizeString(OpndSize size) {
	888	+ for( unsigned i = 0; i<COUNTOF(sizes); i++ ) {
	889	+ if( sizes[i].size==size ) {
	890	+ return sizes[i].sizeString;
	891	+ }
	892	+ }
	893	+ return NULL;
	894	+}
	895	+
	896	+static const struct {
	897	+ char kindString[16];
	898	+ OpndKind kind;
	899	+}
	900	+kinds[] = {
	901	+ { "Null", OpndKind_Null },
	902	+ { "GPReg", OpndKind_GPReg },
	903	+ { "SReg", OpndKind_SReg },
	904	+ { "FPReg", OpndKind_FPReg },
	905	+ { "XMMReg", OpndKind_XMMReg },
	906	+#ifdef _HAVE_MMX_
	907	+ { "MMXReg", OpndKind_MMXReg },
	908	+#endif
	909	+ { "StatusReg", OpndKind_StatusReg },
	910	+ { "Reg", OpndKind_Reg },
	911	+ { "Imm", OpndKind_Imm },
	912	+ { "Mem", OpndKind_Mem },
	913	+ { "Any", OpndKind_Any },
	914	+};
	915	+
	916	+const char * getOpndKindString(OpndKind kind)
	917	+{
	918	+ for (unsigned i = 0; i<COUNTOF(kinds); i++) {
	919	+ if (kinds[i].kind==kind) {
	920	+ return kinds[i].kindString;
	921	+ }
	922	+ }
	923	+ return NULL;
	924	+}
	925	+
	926	+OpndKind getOpndKind(const char * kindString)
	927	+{
	928	+ assert(kindString);
	929	+ for (unsigned i = 0; i<COUNTOF(kinds); i++) {
	930	+ if (!strcmpi(kindString, kinds[i].kindString)) {
	931	+ return kinds[i].kind;
	932	+ }
	933	+ }
	934	+ return OpndKind_Null;
	935	+}
	936	+
	937	+/**
	938	+ * A mapping between register string representation and its RegName constant.
	939	+ */
	940	+static const struct {
	941	+ char regstring[7];
	942	+ RegName regname;
	943	+}
	944	+
	945	+registers[] = {
	946	+#ifdef _EM64T_
	947	+ {"RAX", RegName_RAX},
	948	+ {"RBX", RegName_RBX},
	949	+ {"RCX", RegName_RCX},
	950	+ {"RDX", RegName_RDX},
	951	+ {"RBP", RegName_RBP},
	952	+ {"RSI", RegName_RSI},
	953	+ {"RDI", RegName_RDI},
	954	+ {"RSP", RegName_RSP},
	955	+ {"R8", RegName_R8},
	956	+ {"R9", RegName_R9},
	957	+ {"R10", RegName_R10},
	958	+ {"R11", RegName_R11},
	959	+ {"R12", RegName_R12},
	960	+ {"R13", RegName_R13},
	961	+ {"R14", RegName_R14},
	962	+ {"R15", RegName_R15},
	963	+#endif
	964	+
	965	+ {"EAX", RegName_EAX},
	966	+ {"ECX", RegName_ECX},
	967	+ {"EDX", RegName_EDX},
	968	+ {"EBX", RegName_EBX},
	969	+ {"ESP", RegName_ESP},
	970	+ {"EBP", RegName_EBP},
	971	+ {"ESI", RegName_ESI},
	972	+ {"EDI", RegName_EDI},
	973	+#ifdef _EM64T_
	974	+ {"R8D", RegName_R8D},
	975	+ {"R9D", RegName_R9D},
	976	+ {"R10D", RegName_R10D},
	977	+ {"R11D", RegName_R11D},
	978	+ {"R12D", RegName_R12D},
	979	+ {"R13D", RegName_R13D},
	980	+ {"R14D", RegName_R14D},
	981	+ {"R15D", RegName_R15D},
	982	+#endif
	983	+
	984	+ {"AX", RegName_AX},
	985	+ {"CX", RegName_CX},
	986	+ {"DX", RegName_DX},
	987	+ {"BX", RegName_BX},
	988	+ {"SP", RegName_SP},
	989	+ {"BP", RegName_BP},
	990	+ {"SI", RegName_SI},
	991	+ {"DI", RegName_DI},
	992	+
	993	+ {"AL", RegName_AL},
	994	+ {"CL", RegName_CL},
	995	+ {"DL", RegName_DL},
	996	+ {"BL", RegName_BL},
	997	+#if !defined(_EM64T_)
	998	+ {"AH", RegName_AH},
	999	+ {"CH", RegName_CH},
	1000	+ {"DH", RegName_DH},
	1001	+ {"BH", RegName_BH},
	1002	+#else
	1003	+ {"SPL", RegName_SPL},
	1004	+ {"BPL", RegName_BPL},
	1005	+ {"SIL", RegName_SIL},
	1006	+ {"DIL", RegName_DIL},
	1007	+ {"R8L", RegName_R8L},
	1008	+ {"R9L", RegName_R9L},
	1009	+ {"R10L", RegName_R10L},
	1010	+ {"R11L", RegName_R11L},
	1011	+ {"R12L", RegName_R12L},
	1012	+ {"R13L", RegName_R13L},
	1013	+ {"R14L", RegName_R14L},
	1014	+ {"R15L", RegName_R15L},
	1015	+#endif
	1016	+ {"ES", RegName_ES},
	1017	+ {"CS", RegName_CS},
	1018	+ {"SS", RegName_SS},
	1019	+ {"DS", RegName_DS},
	1020	+ {"FS", RegName_FS},
	1021	+ {"GS", RegName_GS},
	1022	+
	1023	+ {"FP0", RegName_FP0},
	1024	+/*
	1025	+ {"FP1", RegName_FP1},
	1026	+ {"FP2", RegName_FP2},
	1027	+ {"FP3", RegName_FP3},
	1028	+ {"FP4", RegName_FP4},
	1029	+ {"FP5", RegName_FP5},
	1030	+ {"FP6", RegName_FP6},
	1031	+ {"FP7", RegName_FP7},
	1032	+*/
	1033	+ {"FP0S", RegName_FP0S},
	1034	+ {"FP1S", RegName_FP1S},
	1035	+ {"FP2S", RegName_FP2S},
	1036	+ {"FP3S", RegName_FP3S},
	1037	+ {"FP4S", RegName_FP4S},
	1038	+ {"FP5S", RegName_FP5S},
	1039	+ {"FP6S", RegName_FP6S},
	1040	+ {"FP7S", RegName_FP7S},
	1041	+
	1042	+ {"FP0D", RegName_FP0D},
	1043	+ {"FP1D", RegName_FP1D},
	1044	+ {"FP2D", RegName_FP2D},
	1045	+ {"FP3D", RegName_FP3D},
	1046	+ {"FP4D", RegName_FP4D},
	1047	+ {"FP5D", RegName_FP5D},
	1048	+ {"FP6D", RegName_FP6D},
	1049	+ {"FP7D", RegName_FP7D},
	1050	+
	1051	+ {"XMM0", RegName_XMM0},
	1052	+ {"XMM1", RegName_XMM1},
	1053	+ {"XMM2", RegName_XMM2},
	1054	+ {"XMM3", RegName_XMM3},
	1055	+ {"XMM4", RegName_XMM4},
	1056	+ {"XMM5", RegName_XMM5},
	1057	+ {"XMM6", RegName_XMM6},
	1058	+ {"XMM7", RegName_XMM7},
	1059	+#ifdef _EM64T_
	1060	+ {"XMM8", RegName_XMM8},
	1061	+ {"XMM9", RegName_XMM9},
	1062	+ {"XMM10", RegName_XMM10},
	1063	+ {"XMM11", RegName_XMM11},
	1064	+ {"XMM12", RegName_XMM12},
	1065	+ {"XMM13", RegName_XMM13},
	1066	+ {"XMM14", RegName_XMM14},
	1067	+ {"XMM15", RegName_XMM15},
	1068	+#endif
	1069	+
	1070	+
	1071	+ {"XMM0S", RegName_XMM0S},
	1072	+ {"XMM1S", RegName_XMM1S},
	1073	+ {"XMM2S", RegName_XMM2S},
	1074	+ {"XMM3S", RegName_XMM3S},
	1075	+ {"XMM4S", RegName_XMM4S},
	1076	+ {"XMM5S", RegName_XMM5S},
	1077	+ {"XMM6S", RegName_XMM6S},
	1078	+ {"XMM7S", RegName_XMM7S},
	1079	+#ifdef _EM64T_
	1080	+ {"XMM8S", RegName_XMM8S},
	1081	+ {"XMM9S", RegName_XMM9S},
	1082	+ {"XMM10S", RegName_XMM10S},
	1083	+ {"XMM11S", RegName_XMM11S},
	1084	+ {"XMM12S", RegName_XMM12S},
	1085	+ {"XMM13S", RegName_XMM13S},
	1086	+ {"XMM14S", RegName_XMM14S},
	1087	+ {"XMM15S", RegName_XMM15S},
	1088	+#endif
	1089	+
	1090	+ {"XMM0D", RegName_XMM0D},
	1091	+ {"XMM1D", RegName_XMM1D},
	1092	+ {"XMM2D", RegName_XMM2D},
	1093	+ {"XMM3D", RegName_XMM3D},
	1094	+ {"XMM4D", RegName_XMM4D},
	1095	+ {"XMM5D", RegName_XMM5D},
	1096	+ {"XMM6D", RegName_XMM6D},
	1097	+ {"XMM7D", RegName_XMM7D},
	1098	+#ifdef _EM64T_
	1099	+ {"XMM8D", RegName_XMM8D},
	1100	+ {"XMM9D", RegName_XMM9D},
	1101	+ {"XMM10D", RegName_XMM10D},
	1102	+ {"XMM11D", RegName_XMM11D},
	1103	+ {"XMM12D", RegName_XMM12D},
	1104	+ {"XMM13D", RegName_XMM13D},
	1105	+ {"XMM14D", RegName_XMM14D},
	1106	+ {"XMM15D", RegName_XMM15D},
	1107	+#endif
	1108	+
	1109	+ {"EFLGS", RegName_EFLAGS},
	1110	+};
	1111	+
	1112	+
	1113	+const char * getRegNameString(RegName reg)
	1114	+{
	1115	+ for (unsigned i = 0; i<COUNTOF(registers); i++) {
	1116	+ if (registers[i].regname == reg) {
	1117	+ return registers[i].regstring;
	1118	+ }
	1119	+ }
	1120	+ return "(null)";
	1121	+}
	1122	+
	1123	+RegName getRegName(const char * regname)
	1124	+{
	1125	+ if (NULL == regname) {
	1126	+ return RegName_Null;
	1127	+ }
	1128	+
	1129	+ for (unsigned i = 0; i<COUNTOF(registers); i++) {
	1130	+ if (!strcmpi(regname,registers[i].regstring)) {
	1131	+ return registers[i].regname;
	1132	+ }
	1133	+ }
	1134	+ return RegName_Null;
	1135	+}
	1136	+
	1137	+ENCODER_NAMESPACE_END

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/enc_base.h

		@@ -0,0 +1,748 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+
	21	+/**
	22	+ * @file
	23	+ * @brief Main encoding routines and structures.
	24	+ */
	25	+
	26	+#ifndef __ENC_BASE_H_INCLUDED__
	27	+#define __ENC_BASE_H_INCLUDED__
	28	+
	29	+#include "enc_defs.h"
	30	+
	31	+
	32	+#include <stdlib.h>
	33	+#include <assert.h>
	34	+#include <memory.h>
	35	+
	36	+ENCODER_NAMESPACE_START
	37	+struct MnemonicInfo;
	38	+struct OpcodeInfo;
	39	+struct Rex;
	40	+
	41	+/**
	42	+ * @brief Basic facilities for generation of processor's instructions.
	43	+ *
	44	+ * The class EncoderBase represents the basic facilities for the encoding of
	45	+ * processor's instructions on IA32 and EM64T platforms.
	46	+ *
	47	+ * The class provides general interface to generate the instructions as well
	48	+ * as to retrieve some static data about instructions (number of arguments,
	49	+ * their roles, etc).
	50	+ *
	51	+ * Currently, the EncoderBase class is used for both LIL and Jitrino code
	52	+ * generators. Each of these code generators has its own wrapper to adapt
	53	+ * this general interface for specific needs - see encoder.h for LIL wrappers
	54	+ * and Ia32Encoder.h for Jitrino's adapter.
	55	+ *
	56	+ * Interface is provided through static methods, no instances of EncoderBase
	57	+ * to be created.
	58	+ *
	59	+ * @todo RIP-based addressing on EM64T - it's not yet supported currently.
	60	+ */
	61	+class EncoderBase {
	62	+public:
	63	+ class Operands;
	64	+ struct MnemonicDesc;
	65	+ /**
	66	+ * @brief Generates processor's instruction.
	67	+ *
	68	+ * @param stream - a buffer to generate into
	69	+ * @param mn - \link Mnemonic mnemonic \endlink of the instruction
	70	+ * @param opnds - operands for the instruction
	71	+ * @returns (stream + length of the just generated instruction)
	72	+ */
	73	+ static char * encode(char * stream, Mnemonic mn, const Operands& opnds);
	74	+ static char * getOpndLocation(int index);
	75	+
	76	+ /**
	77	+ * @brief Generates the smallest possible number of NOP-s.
	78	+ *
	79	+ * Effectively generates the smallest possible number of instructions,
	80	+ * which are NOP-s for CPU. Normally used to make a code alignment.
	81	+ *
	82	+ * The method inserts exactly number of bytes specified. It's a caller's
	83	+ * responsibility to make sure the buffer is big enough.
	84	+ *
	85	+ * @param stream - buffer where to generate code into, can not be NULL
	86	+ * @param howMany - how many bytes to fill with NOP-s
	87	+ * @return \c (stream+howMany)
	88	+ */
	89	+ static char * nops(char * stream, unsigned howMany);
	90	+
	91	+ /**
	92	+ * @brief Inserts a prefix into the code buffer.
	93	+ *
	94	+ * The method writes no more than one byte into the buffer. This is a
	95	+ * caller's responsibility to make sure the buffer is big enough.
	96	+ *
	97	+ * @param stream - buffer where to insert the prefix
	98	+ * @param pref - prefix to be inserted. If it's InstPrefix_Null, then
	99	+ * no action performed and return value is \c stream.
	100	+ * @return \c (stream+1) if pref is not InstPrefix_Null, or \c stream
	101	+ * otherwise
	102	+ */
	103	+ static char * prefix(char* stream, InstPrefix pref);
	104	+
	105	+ /**
	106	+ * @brief Determines if operand with opndExt suites the position with instExt.
	107	+ */
	108	+ static bool extAllowed(OpndExt opndExt, OpndExt instExt);
	109	+
	110	+ /**
	111	+ * @brief Returns MnemonicDesc by the given Mnemonic.
	112	+ */
	113	+ static const MnemonicDesc * getMnemonicDesc(Mnemonic mn)
	114	+ {
	115	+ assert(mn < Mnemonic_Count);
	116	+ return mnemonics + mn;
	117	+ }
	118	+
	119	+ /**
	120	+ * @brief Returns a Mnemonic for the given name.
	121	+ *
	122	+ * The lookup is case insensitive, if no mnemonic found for the given
	123	+ * string, then Mnemonic_Null returned.
	124	+ */
	125	+ static Mnemonic str2mnemonic(const char * mn_name);
	126	+
	127	+ /**
	128	+ * @brief Returns a string representation of the given Mnemonic.
	129	+ *
	130	+ * If invalid mnemonic passed, then the behavior is unpredictable.
	131	+ */
	132	+ static const char * getMnemonicString(Mnemonic mn)
	133	+ {
	134	+ return getMnemonicDesc(mn)->name;
	135	+ }
	136	+
	137	+ static const char * toStr(Mnemonic mn)
	138	+ {
	139	+ return getMnemonicDesc(mn)->name;
	140	+ }
	141	+
	142	+
	143	+ /**
	144	+ * @brief Description of operand.
	145	+ *
	146	+ * Description of an operand in opcode - its kind, size or RegName if
	147	+ * operand must be a particular register.
	148	+ */
	149	+ struct OpndDesc {
	150	+ /**
	151	+ * @brief Location of the operand.
	152	+ *
	153	+ * May be a mask, i.e. OpndKind_Imm\|OpndKind_Mem.
	154	+ */
	155	+ OpndKind kind;
	156	+ /**
	157	+ * @brief Size of the operand.
	158	+ */
	159	+ OpndSize size;
	160	+ /**
	161	+ * @brief Extention of the operand.
	162	+ */
	163	+ OpndExt ext;
	164	+ /**
	165	+ * @brief Appropriate RegName if operand must reside on a particular
	166	+ * register (i.e. CWD/CDQ instructions), RegName_Null
	167	+ * otherwise.
	168	+ */
	169	+ RegName reg;
	170	+ };
	171	+
	172	+ /**
	173	+ * @brief Description of operands' roles in instruction.
	174	+ */
	175	+ struct OpndRolesDesc {
	176	+ /**
	177	+ * @brief Total number of operands in the operation.
	178	+ */
	179	+ unsigned count;
	180	+ /**
	181	+ * @brief Number of defs in the operation.
	182	+ */
	183	+ unsigned defCount;
	184	+ /**
	185	+ * @brief Number of uses in the operation.
	186	+ */
	187	+ unsigned useCount;
	188	+ /**
	189	+ * @brief Operand roles, bit-packed.
	190	+ *
	191	+ * A bit-packed info about operands' roles. Each operand's role is
	192	+ * described by two bits, counted from right-to-left - the less
	193	+ * significant bits (0,1) represent operand#0.
	194	+ *
	195	+ * The mask is build by ORing #OpndRole_Def and #OpndRole_Use
	196	+ * appropriately and shifting left, i.e. operand#0's role would be
	197	+ * - '(OpndRole_Def\|OpndRole_Use)'
	198	+ * - opnd#1's role would be 'OpndRole_Use<<2'
	199	+ * - and operand#2's role would be, say, 'OpndRole_Def<<4'.
	200	+ */
	201	+ unsigned roles;
	202	+ };
	203	+
	204	+ /**
	205	+ * @brief Extracts appropriate OpndRole for a given operand.
	206	+ *
	207	+ * The order of operands is left-to-right, i.e. for MOV, it
	208	+ * would be 'MOV op0, op1'
	209	+ */
	210	+ static OpndRole getOpndRoles(OpndRolesDesc ord, unsigned idx)
	211	+ {
	212	+ assert(idx < ord.count);
	213	+ return (OpndRole)(ord.roles>>((ord.count-1-idx)*2) & 0x3);
	214	+ }
	215	+
	216	+ /**
	217	+ * @brief Defines the maximum number of operands for an opcode.
	218	+ *
	219	+ * The 3 mostly comes from IDIV/IMUL which both may have up to
	220	+ * 3 operands.
	221	+ */
	222	+ static const unsigned int MAX_NUM_OPCODE_OPERANDS = 3;
	223	+
	224	+ /**
	225	+ * @brief Info about single opcode - its opcode bytes, operands,
	226	+ * operands' roles.
	227	+ */
	228	+ union OpcodeDesc {
	229	+ char dummy[128]; // To make total size a power of 2
	230	+
	231	+ struct {
	232	+ /**
	233	+ * @brief Raw opcode bytes.
	234	+ *
	235	+ * 'Raw' opcode bytes which do not require any analysis and are
	236	+ * independent from arguments/sizes/etc (may include opcode size
	237	+ * prefix).
	238	+ */
	239	+ char opcode[5];
	240	+ unsigned opcode_len;
	241	+ unsigned aux0;
	242	+ unsigned aux1;
	243	+ /**
	244	+ * @brief Info about opcode's operands.
	245	+ */
	246	+ OpndDesc opnds[MAX_NUM_OPCODE_OPERANDS];
	247	+ unsigned first_opnd;
	248	+ /**
	249	+ * @brief Info about operands - total number, number of uses/defs,
	250	+ * operands' roles.
	251	+ */
	252	+ OpndRolesDesc roles;
	253	+ /**
	254	+ * @brief If not zero, then this is final OpcodeDesc structure in
	255	+ * the list of opcodes for a given mnemonic.
	256	+ */
	257	+ char last;
	258	+ char platf;
	259	+ };
	260	+ };
	261	+public:
	262	+ /**
	263	+ * @brief General info about mnemonic.
	264	+ */
	265	+ struct MnemonicDesc {
	266	+ /**
	267	+ * @brief The mnemonic itself.
	268	+ */
	269	+ Mnemonic mn;
	270	+ /**
	271	+ * Various characteristics of mnemonic.
	272	+ * @see MF_
	273	+ */
	274	+ unsigned flags;
	275	+ /**
	276	+ * @brief Operation's operand's count and roles.
	277	+ *
	278	+ * For the operations whose opcodes may use different number of
	279	+ * operands (i.e. IMUL/SHL) either most common value used, or empty
	280	+ * value left.
	281	+ */
	282	+ OpndRolesDesc roles;
	283	+ /**
	284	+ * @brief Print name of the mnemonic.
	285	+ */
	286	+ const char * name;
	287	+ };
	288	+
	289	+
	290	+ /**
	291	+ * @brief Magic number, shows a maximum value a hash code can take.
	292	+ *
	293	+ * For meaning and arithmetics see enc_tabl.cpp.
	294	+ *
	295	+ * The value was increased from '5155' to '8192' to make it aligned
	296	+ * for faster access in EncoderBase::lookup().
	297	+ *
	298	+ * It was further increased to 16384 as support for 3 operand opcodes
	299	+ * with XMM registers were added
	300	+ */
	301	+ static const unsigned int HASH_MAX = 16384; //5155;
	302	+ /**
	303	+ * @brief Empty value, used in hash-to-opcode map to show an empty slot.
	304	+ */
	305	+ static const unsigned char NOHASH = 0xFF;
	306	+ /**
	307	+ * @brief The name says it all.
	308	+ */
	309	+ static const unsigned char HASH_BITS_PER_OPERAND = 5;
	310	+
	311	+ /**
	312	+ * @brief Contains info about a single instructions's operand - its
	313	+ * location, size and a value for immediate or RegName for
	314	+ * register operands.
	315	+ */
	316	+ class Operand {
	317	+ public:
	318	+ /**
	319	+ * @brief Initializes the instance with empty size and kind.
	320	+ */
	321	+ Operand() : m_kind(OpndKind_Null), m_size(OpndSize_Null), m_ext(OpndExt_None), m_need_rex(false) {}
	322	+ /**
	323	+ * @brief Creates register operand from given RegName.
	324	+ */
	325	+ Operand(RegName reg, OpndExt ext = OpndExt_None) : m_kind(getRegKind(reg)),
	326	+ m_size(getRegSize(reg)),
	327	+ m_ext(ext), m_reg(reg)
	328	+ {
	329	+ hash_it();
	330	+ }
	331	+ /**
	332	+ * @brief Creates register operand from given RegName and with the
	333	+ * specified size and kind.
	334	+ *
	335	+ * Used to speedup Operand creation as there is no need to extract
	336	+ * size and kind from the RegName.
	337	+ * The provided size and kind must match the RegName's ones though.
	338	+ */
	339	+ Operand(OpndSize sz, OpndKind kind, RegName reg, OpndExt ext = OpndExt_None) :
	340	+ m_kind(kind), m_size(sz), m_ext(ext), m_reg(reg)
	341	+ {
	342	+ assert(m_size == getRegSize(reg));
	343	+ assert(m_kind == getRegKind(reg));
	344	+ hash_it();
	345	+ }
	346	+ /**
	347	+ * @brief Creates immediate operand with the given size and value.
	348	+ */
	349	+ Operand(OpndSize size, long long ival, OpndExt ext = OpndExt_None) :
	350	+ m_kind(OpndKind_Imm), m_size(size), m_ext(ext), m_imm64(ival)
	351	+ {
	352	+ hash_it();
	353	+ }
	354	+ /**
	355	+ * @brief Creates immediate operand of OpndSize_32.
	356	+ */
	357	+ Operand(int ival, OpndExt ext = OpndExt_None) :
	358	+ m_kind(OpndKind_Imm), m_size(OpndSize_32), m_ext(ext), m_imm64(ival)
	359	+ {
	360	+ hash_it();
	361	+ }
	362	+ /**
	363	+ * @brief Creates immediate operand of OpndSize_16.
	364	+ */
	365	+ Operand(short ival, OpndExt ext = OpndExt_None) :
	366	+ m_kind(OpndKind_Imm), m_size(OpndSize_16), m_ext(ext), m_imm64(ival)
	367	+ {
	368	+ hash_it();
	369	+ }
	370	+
	371	+ /**
	372	+ * @brief Creates immediate operand of OpndSize_8.
	373	+ */
	374	+ Operand(char ival, OpndExt ext = OpndExt_None) :
	375	+ m_kind(OpndKind_Imm), m_size(OpndSize_8), m_ext(ext), m_imm64(ival)
	376	+ {
	377	+ hash_it();
	378	+ }
	379	+
	380	+ /**
	381	+ * @brief Creates memory operand.
	382	+ */
	383	+ Operand(OpndSize size, RegName base, RegName index, unsigned scale,
	384	+ int disp, OpndExt ext = OpndExt_None) : m_kind(OpndKind_Mem), m_size(size), m_ext(ext)
	385	+ {
	386	+ m_base = base;
	387	+ m_index = index;
	388	+ m_scale = scale;
	389	+ m_disp = disp;
	390	+ hash_it();
	391	+ }
	392	+
	393	+ /**
	394	+ * @brief Creates memory operand with only base and displacement.
	395	+ */
	396	+ Operand(OpndSize size, RegName base, int disp, OpndExt ext = OpndExt_None) :
	397	+ m_kind(OpndKind_Mem), m_size(size), m_ext(ext)
	398	+ {
	399	+ m_base = base;
	400	+ m_index = RegName_Null;
	401	+ m_scale = 0;
	402	+ m_disp = disp;
	403	+ hash_it();
	404	+ }
	405	+ //
	406	+ // general info
	407	+ //
	408	+ /**
	409	+ * @brief Returns kind of the operand.
	410	+ */
	411	+ OpndKind kind(void) const { return m_kind; }
	412	+ /**
	413	+ * @brief Returns size of the operand.
	414	+ */
	415	+ OpndSize size(void) const { return m_size; }
	416	+ /**
	417	+ * @brief Returns extention of the operand.
	418	+ */
	419	+ OpndExt ext(void) const { return m_ext; }
	420	+ /**
	421	+ * @brief Returns hash of the operand.
	422	+ */
	423	+ unsigned hash(void) const { return m_hash; }
	424	+ //
	425	+#ifdef _EM64T_
	426	+ bool need_rex(void) const { return m_need_rex; }
	427	+#else
	428	+ bool need_rex(void) const { return false; }
	429	+#endif
	430	+ /**
	431	+ * @brief Tests whether operand is memory operand.
	432	+ */
	433	+ bool is_mem(void) const { return is_placed_in(OpndKind_Mem); }
	434	+ /**
	435	+ * @brief Tests whether operand is immediate operand.
	436	+ */
	437	+ bool is_imm(void) const { return is_placed_in(OpndKind_Imm); }
	438	+ /**
	439	+ * @brief Tests whether operand is register operand.
	440	+ */
	441	+ bool is_reg(void) const { return is_placed_in(OpndKind_Reg); }
	442	+ /**
	443	+ * @brief Tests whether operand is general-purpose register operand.
	444	+ */
	445	+ bool is_gpreg(void) const { return is_placed_in(OpndKind_GPReg); }
	446	+ /**
	447	+ * @brief Tests whether operand is float-point pseudo-register operand.
	448	+ */
	449	+ bool is_fpreg(void) const { return is_placed_in(OpndKind_FPReg); }
	450	+ /**
	451	+ * @brief Tests whether operand is XMM register operand.
	452	+ */
	453	+ bool is_xmmreg(void) const { return is_placed_in(OpndKind_XMMReg); }
	454	+#ifdef _HAVE_MMX_
	455	+ /**
	456	+ * @brief Tests whether operand is MMX register operand.
	457	+ */
	458	+ bool is_mmxreg(void) const { return is_placed_in(OpndKind_MMXReg); }
	459	+#endif
	460	+ /**
	461	+ * @brief Tests whether operand is signed immediate operand.
	462	+ */
	463	+ //bool is_signed(void) const { assert(is_imm()); return m_is_signed; }
	464	+
	465	+ /**
	466	+ * @brief Returns base of memory operand (RegName_Null if not memory).
	467	+ */
	468	+ RegName base(void) const { return is_mem() ? m_base : RegName_Null; }
	469	+ /**
	470	+ * @brief Returns index of memory operand (RegName_Null if not memory).
	471	+ */
	472	+ RegName index(void) const { return is_mem() ? m_index : RegName_Null; }
	473	+ /**
	474	+ * @brief Returns scale of memory operand (0 if not memory).
	475	+ */
	476	+ unsigned scale(void) const { return is_mem() ? m_scale : 0; }
	477	+ /**
	478	+ * @brief Returns displacement of memory operand (0 if not memory).
	479	+ */
	480	+ int disp(void) const { return is_mem() ? m_disp : 0; }
	481	+ /**
	482	+ * @brief Returns RegName of register operand (RegName_Null if not
	483	+ * register).
	484	+ */
	485	+ RegName reg(void) const { return is_reg() ? m_reg : RegName_Null; }
	486	+ /**
	487	+ * @brief Returns value of immediate operand (0 if not immediate).
	488	+ */
	489	+ long long imm(void) const { return is_imm() ? m_imm64 : 0; }
	490	+ private:
	491	+ bool is_placed_in(OpndKind kd) const
	492	+ {
	493	+ return kd == OpndKind_Reg ?
	494	+ m_kind == OpndKind_GPReg \|\|
	495	+#ifdef _HAVE_MMX_
	496	+ m_kind == OpndKind_MMXReg \|\|
	497	+#endif
	498	+ m_kind == OpndKind_FPReg \|\|
	499	+ m_kind == OpndKind_XMMReg
	500	+ : kd == m_kind;
	501	+ }
	502	+ void hash_it(void)
	503	+ {
	504	+ m_hash = get_size_hash(m_size) \| get_kind_hash(m_kind);
	505	+#ifdef _EM64T_
	506	+ m_need_rex = false;
	507	+ if (is_reg() && is_em64t_extra_reg(m_reg)) {
	508	+ m_need_rex = true;
	509	+ }
	510	+ else if (is_mem() && (is_em64t_extra_reg(m_base) \|\|
	511	+ is_em64t_extra_reg(m_index))) {
	512	+ m_need_rex = true;
	513	+ }
	514	+#endif
	515	+ }
	516	+ // general info
	517	+ OpndKind m_kind;
	518	+ OpndSize m_size;
	519	+ OpndExt m_ext;
	520	+ // complex address form support
	521	+ RegName m_base;
	522	+ RegName m_index;
	523	+ unsigned m_scale;
	524	+ union {
	525	+ int m_disp;
	526	+ RegName m_reg;
	527	+ long long m_imm64;
	528	+ };
	529	+ unsigned m_hash;
	530	+ bool m_need_rex;
	531	+ friend class EncoderBase::Operands;
	532	+ };
	533	+ /**
	534	+ * @brief Simple container for up to 3 Operand-s.
	535	+ */
	536	+ class Operands {
	537	+ public:
	538	+ Operands(void)
	539	+ {
	540	+ clear();
	541	+ }
	542	+ Operands(const Operand& op0)
	543	+ {
	544	+ clear();
	545	+ add(op0);
	546	+ }
	547	+
	548	+ Operands(const Operand& op0, const Operand& op1)
	549	+ {
	550	+ clear();
	551	+ add(op0); add(op1);
	552	+ }
	553	+
	554	+ Operands(const Operand& op0, const Operand& op1, const Operand& op2)
	555	+ {
	556	+ clear();
	557	+ add(op0); add(op1); add(op2);
	558	+ }
	559	+
	560	+ unsigned count(void) const { return m_count; }
	561	+ unsigned hash(void) const { return m_hash; }
	562	+ const Operand& operator[](unsigned idx) const
	563	+ {
	564	+ assert(idx<m_count);
	565	+ return m_operands[idx];
	566	+ }
	567	+
	568	+ void add(const Operand& op)
	569	+ {
	570	+ assert(m_count < COUNTOF(m_operands));
	571	+ m_hash = (m_hash<<HASH_BITS_PER_OPERAND) \| op.hash();
	572	+ m_operands[m_count++] = op;
	573	+ m_need_rex = m_need_rex \|\| op.m_need_rex;
	574	+ }
	575	+#ifdef _EM64T_
	576	+ bool need_rex(void) const { return m_need_rex; }
	577	+#else
	578	+ bool need_rex(void) const { return false; }
	579	+#endif
	580	+ void clear(void)
	581	+ {
	582	+ m_count = 0; m_hash = 0; m_need_rex = false;
	583	+ }
	584	+ private:
	585	+ unsigned m_count;
	586	+ Operand m_operands[COUNTOF( ((OpcodeDesc*)NULL)->opnds )];
	587	+ unsigned m_hash;
	588	+ bool m_need_rex;
	589	+ };
	590	+public:
	591	+#ifdef _DEBUG
	592	+ /**
	593	+ * Verifies some presumptions about encoding data table.
	594	+ * Called automaticaly during statics initialization.
	595	+ */
	596	+ static int verify(void);
	597	+#endif
	598	+
	599	+private:
	600	+ /**
	601	+ * @brief Returns found OpcodeDesc by the given Mnemonic and operands.
	602	+ */
	603	+ static const OpcodeDesc * lookup(Mnemonic mn, const Operands& opnds);
	604	+ /**
	605	+ * @brief Encodes mod/rm byte.
	606	+ */
	607	+ static char* encodeModRM(char* stream, const Operands& opnds,
	608	+ unsigned idx, const OpcodeDesc * odesc, Rex * prex);
	609	+ /**
	610	+ * @brief Encodes special things of opcode description - '/r', 'ib', etc.
	611	+ */
	612	+ static char* encode_aux(char* stream, unsigned aux,
	613	+ const Operands& opnds, const OpcodeDesc * odesc,
	614	+ unsigned * pargsCount, Rex* prex);
	615	+#ifdef _EM64T_
	616	+ /**
	617	+ * @brief Returns true if the 'reg' argument represents one of the new
	618	+ * EM64T registers - R8(D)-R15(D).
	619	+ *
	620	+ * The 64 bits versions of 'old-fashion' registers, i.e. RAX are not
	621	+ * considered as 'extra'.
	622	+ */
	623	+ static bool is_em64t_extra_reg(const RegName reg)
	624	+ {
	625	+ if (needs_rex_r(reg)) {
	626	+ return true;
	627	+ }
	628	+ if (RegName_SPL <= reg && reg <= RegName_R15L) {
	629	+ return true;
	630	+ }
	631	+ return false;
	632	+ }
	633	+ static bool needs_rex_r(const RegName reg)
	634	+ {
	635	+ if (RegName_R8 <= reg && reg <= RegName_R15) {
	636	+ return true;
	637	+ }
	638	+ if (RegName_R8D <= reg && reg <= RegName_R15D) {
	639	+ return true;
	640	+ }
	641	+ if (RegName_R8S <= reg && reg <= RegName_R15S) {
	642	+ return true;
	643	+ }
	644	+ if (RegName_R8L <= reg && reg <= RegName_R15L) {
	645	+ return true;
	646	+ }
	647	+ if (RegName_XMM8 <= reg && reg <= RegName_XMM15) {
	648	+ return true;
	649	+ }
	650	+ if (RegName_XMM8D <= reg && reg <= RegName_XMM15D) {
	651	+ return true;
	652	+ }
	653	+ if (RegName_XMM8S <= reg && reg <= RegName_XMM15S) {
	654	+ return true;
	655	+ }
	656	+ return false;
	657	+ }
	658	+ /**
	659	+ * @brief Returns an 'processor's index' of the register - the index
	660	+ * used to encode the register in ModRM/SIB bytes.
	661	+ *
	662	+ * For the new EM64T registers the 'HW index' differs from the index
	663	+ * encoded in RegName. For old-fashion registers it's effectively the
	664	+ * same as ::getRegIndex(RegName).
	665	+ */
	666	+ static unsigned char getHWRegIndex(const RegName reg)
	667	+ {
	668	+ if (getRegKind(reg) != OpndKind_GPReg) {
	669	+ return getRegIndex(reg);
	670	+ }
	671	+ if (RegName_SPL <= reg && reg<=RegName_DIL) {
	672	+ return getRegIndex(reg);
	673	+ }
	674	+ if (RegName_R8L<= reg && reg<=RegName_R15L) {
	675	+ return getRegIndex(reg) - getRegIndex(RegName_R8L);
	676	+ }
	677	+ return is_em64t_extra_reg(reg) ?
	678	+ getRegIndex(reg)-getRegIndex(RegName_R8D) : getRegIndex(reg);
	679	+ }
	680	+#else
	681	+ static unsigned char getHWRegIndex(const RegName reg)
	682	+ {
	683	+ return getRegIndex(reg);
	684	+ }
	685	+ static bool is_em64t_extra_reg(const RegName reg)
	686	+ {
	687	+ return false;
	688	+ }
	689	+#endif
	690	+public:
	691	+ static unsigned char get_size_hash(OpndSize size) {
	692	+ return (size <= OpndSize_64) ? size_hash[size] : 0xFF;
	693	+ }
	694	+ static unsigned char get_kind_hash(OpndKind kind) {
	695	+ return (kind <= OpndKind_Mem) ? kind_hash[kind] : 0xFF;
	696	+ }
	697	+
	698	+ /**
	699	+ * @brief A table used for the fast computation of hash value.
	700	+ *
	701	+ * A change must be strictly balanced with hash-related functions and data
	702	+ * in enc_base.h/.cpp.
	703	+ */
	704	+ static const unsigned char size_hash[OpndSize_64+1];
	705	+ /**
	706	+ * @brief A table used for the fast computation of hash value.
	707	+ *
	708	+ * A change must be strictly balanced with hash-related functions and data
	709	+ * in enc_base.h/.cpp.
	710	+ */
	711	+ static const unsigned char kind_hash[OpndKind_Mem+1];
	712	+ /**
	713	+ * @brief Maximum number of opcodes used for a single mnemonic.
	714	+ *
	715	+ * No arithmetics behind the number, simply estimated.
	716	+ */
	717	+ static const unsigned int MAX_OPCODES = 32; //20;
	718	+ /**
	719	+ * @brief Mapping between operands hash code and operands.
	720	+ */
	721	+ static unsigned char opcodesHashMap[Mnemonic_Count][HASH_MAX];
	722	+ /**
	723	+ * @brief Array of mnemonics.
	724	+ */
	725	+ static MnemonicDesc mnemonics[Mnemonic_Count];
	726	+ /**
	727	+ * @brief Array of available opcodes.
	728	+ */
	729	+ static OpcodeDesc opcodes[Mnemonic_Count][MAX_OPCODES];
	730	+
	731	+ static int buildTable(void);
	732	+ static void buildMnemonicDesc(const MnemonicInfo * minfo);
	733	+ /**
	734	+ * @brief Computes hash value for the given operands.
	735	+ */
	736	+ static unsigned short getHash(const OpcodeInfo* odesc);
	737	+ /**
	738	+ * @brief Dummy variable, for automatic invocation of buildTable() at
	739	+ * startup.
	740	+ */
	741	+ static int dummy;
	742	+
	743	+ static char * curRelOpnd[3];
	744	+};
	745	+
	746	+ENCODER_NAMESPACE_END
	747	+
	748	+#endif // ifndef __ENC_BASE_H_INCLUDED__

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/enc_defs.h

		@@ -0,0 +1,786 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+#ifndef _ENCODER_DEFS_H_
	21	+#define _ENCODER_DEFS_H_
	22	+
	23	+
	24	+// Used to isolate experimental or being tuned encoder into a separate
	25	+// namespace so it can coexist with a stable one in the same bundle.
	26	+#ifdef ENCODER_ISOLATE
	27	+ #define ENCODER_NAMESPACE_START namespace enc_ia32 {
	28	+ #define ENCODER_NAMESPACE_END };
	29	+#else
	30	+ #define ENCODER_NAMESPACE_START
	31	+ #define ENCODER_NAMESPACE_END
	32	+#endif
	33	+
	34	+#include <assert.h>
	35	+#include "enc_defs_ext.h"
	36	+
	37	+#ifndef COUNTOF
	38	+ /**
	39	+ * Number of items in an array.
	40	+ */
	41	+ #define COUNTOF(a) (sizeof(a)/sizeof(a[0]))
	42	+#endif
	43	+
	44	+#ifdef _EM64T_
	45	+ /**
	46	+ * A stack pointer of default platform's size.
	47	+ */
	48	+ #define REG_STACK RegName_RSP
	49	+ /**
	50	+ * A max GP register (with a highest index number)
	51	+ */
	52	+ #define REG_MAX RegName_R15
	53	+ /**
	54	+ * Total number of GP registers including stack pointer.
	55	+ */
	56	+ #define MAX_REGS 15
	57	+#else
	58	+ #define REG_STACK RegName_ESP
	59	+ #define REG_MAX RegName_EDI
	60	+ #define MAX_REGS 8
	61	+#endif
	62	+
	63	+ENCODER_NAMESPACE_START
	64	+
	65	+/**
	66	+ * A number of bytes 'eaten' by an ordinary PUSH/POP.
	67	+ */
	68	+#define STACK_SLOT_SIZE (sizeof(void*))
	69	+
	70	+
	71	+/**
	72	+ * A recommended by Intel Arch Manual aligment for instructions that
	73	+ * are targets for jmps.
	74	+ */
	75	+#define JMP_TARGET_ALIGMENT (16)
	76	+/**
	77	+ * A maximum possible size of native instruction.
	78	+ */
	79	+#define MAX_NATIVE_INST_SIZE (15)
	80	+/**
	81	+ * The enum OpndKind describes an operand's location - memory, immediate or a register.
	82	+ * It can be used as a bit mask.
	83	+ */
	84	+typedef enum OpndKind {
	85	+ /**
	86	+ * A change must be balanced with at least the following places:
	87	+ * Ia32::Constraint-s use the OpndKind as a mask
	88	+ * encoder.cpp & encoder_master_info.cpp uses OpndKind as an index for hashing
	89	+ * - perhaps there are much more places
	90	+ *
	91	+ * NOTE: an MMXReg kind is incompatible with the current constraints framework,
	92	+ * as it's not encoded as a mask.
	93	+ */
	94	+ OpndKind_Null=0,
	95	+ OpndKind_GPReg = 0x01, OpndKind_MinRegKind = OpndKind_GPReg,
	96	+ OpndKind_SReg = 0x02,
	97	+#ifdef _HAVE_MMX_
	98	+ OpndKind_MMXReg = 0x03,
	99	+#endif
	100	+ OpndKind_FPReg = 0x04,
	101	+ OpndKind_XMMReg = 0x08,
	102	+ OpndKind_OtherReg = 0x10,
	103	+ OpndKind_StatusReg = OpndKind_OtherReg,
	104	+ OpndKind_MaxRegKind = OpndKind_StatusReg, // a max existing kind of register
	105	+ OpndKind_MaxReg, // -'- + 1 to be used in array defs
	106	+ //
	107	+ OpndKind_Immediate = 0x20, OpndKind_Imm=OpndKind_Immediate,
	108	+ OpndKind_Memory = 0x40, OpndKind_Mem=OpndKind_Memory,
	109	+ //
	110	+ OpndKind_Reg = 0x1F,
	111	+ OpndKind_Any = 0x7F,
	112	+ // syntetic constants. Normally not used anywhere, but are used for
	113	+ // human-readable showing under the debugger
	114	+ OpndKind_GPReg_Mem = OpndKind_GPReg\|OpndKind_Mem,
	115	+#ifdef _HAVE_MMX_
	116	+ OpndKind_MMXReg_Mem = OpndKind_MMXReg\|OpndKind_Mem,
	117	+#endif
	118	+ OpndKind_XMMReg_Mem = OpndKind_XMMReg\|OpndKind_Mem,
	119	+} OpndKind;
	120	+
	121	+/**
	122	+ * Defines type of extention allowed for particular operand.
	123	+ * For example imul r32,r_m32,imm8 sign extend imm8 before performing multiplication.
	124	+ * To satisfy instruction constraints immediate operand should be either OpndExt_Signed
	125	+ * or OpndExt_Any.
	126	+ */
	127	+typedef enum OpndExt {
	128	+ OpndExt_None = 0x0,
	129	+ OpndExt_Signed = 0x1,
	130	+ OpndExt_Zero = 0x2,
	131	+ OpndExt_Any = 0x3,
	132	+}OpndExt;
	133	+
	134	+/**
	135	+ * enum OpndRole defines the role of an operand in an instruction
	136	+ * Can be used as mask to combine def and use. The complete def+use
	137	+ * info can be combined in 2 bits which is used, say in Encoder::OpndRole.
	138	+ */
	139	+//TODO: this duplicates an Role used in the Ia32::Inst. That duplicate enum should be removed.
	140	+typedef enum OpndRole {
	141	+ OpndRole_Null=0,
	142	+ OpndRole_Use=0x1,
	143	+ OpndRole_Def=0x2,
	144	+ OpndRole_UseDef=OpndRole_Use\|OpndRole_Def,
	145	+ OpndRole_All=0xffff,
	146	+} OpndRole;
	147	+
	148	+
	149	+#define REGNAME(k,s,i) ( ((k & OpndKind_Any)<<24) \| ((s & OpndSize_Any)<<16) \| (i&0xFF) )
	150	+
	151	+// Gregory -
	152	+// It is critical that all register indexes (3rd number) inside of the
	153	+// following table go in ascending order. That is R8 goes after
	154	+// RDI. It is necessary for decoder when extending registers from RAX-RDI
	155	+// to R8-R15 by simply adding 8 to the index on EM64T architecture
	156	+typedef enum RegName {
	157	+
	158	+ RegName_Null = 0,
	159	+
	160	+#ifdef _EM64T_
	161	+ /*
	162	+ An index part of the RegName-s for RAX-RDI, EAX-ESI, AX-SI and AL-BH is
	163	+ the same as the index used during instructions encoding. The same rule
	164	+ applies for XMM regsters for IA32.
	165	+ For new EM64T registers (both GP and XMM) the index need to be corrected to
	166	+ obtain the index used in processor's instructions.
	167	+ */
	168	+ RegName_RAX = REGNAME(OpndKind_GPReg,OpndSize_64,0),
	169	+ RegName_RCX = REGNAME(OpndKind_GPReg,OpndSize_64,1),
	170	+ RegName_RDX = REGNAME(OpndKind_GPReg,OpndSize_64,2),
	171	+ RegName_RBX = REGNAME(OpndKind_GPReg,OpndSize_64,3),
	172	+ RegName_RSP = REGNAME(OpndKind_GPReg,OpndSize_64,4),
	173	+ RegName_RBP = REGNAME(OpndKind_GPReg,OpndSize_64,5),
	174	+ RegName_RSI = REGNAME(OpndKind_GPReg,OpndSize_64,6),
	175	+ RegName_RDI = REGNAME(OpndKind_GPReg,OpndSize_64,7),
	176	+
	177	+ RegName_R8 = REGNAME(OpndKind_GPReg,OpndSize_64,8),
	178	+ RegName_R9 = REGNAME(OpndKind_GPReg,OpndSize_64,9),
	179	+ RegName_R10 = REGNAME(OpndKind_GPReg,OpndSize_64,10),
	180	+ RegName_R11 = REGNAME(OpndKind_GPReg,OpndSize_64,11),
	181	+ RegName_R12 = REGNAME(OpndKind_GPReg,OpndSize_64,12),
	182	+ RegName_R13 = REGNAME(OpndKind_GPReg,OpndSize_64,13),
	183	+ RegName_R14 = REGNAME(OpndKind_GPReg,OpndSize_64,14),
	184	+ RegName_R15 = REGNAME(OpndKind_GPReg,OpndSize_64,15),
	185	+#endif //~_EM64T_
	186	+
	187	+ RegName_EAX=REGNAME(OpndKind_GPReg,OpndSize_32,0),
	188	+ RegName_ECX=REGNAME(OpndKind_GPReg,OpndSize_32,1),
	189	+ RegName_EDX=REGNAME(OpndKind_GPReg,OpndSize_32,2),
	190	+ RegName_EBX=REGNAME(OpndKind_GPReg,OpndSize_32,3),
	191	+ RegName_ESP=REGNAME(OpndKind_GPReg,OpndSize_32,4),
	192	+ RegName_EBP=REGNAME(OpndKind_GPReg,OpndSize_32,5),
	193	+ RegName_ESI=REGNAME(OpndKind_GPReg,OpndSize_32,6),
	194	+ RegName_EDI=REGNAME(OpndKind_GPReg,OpndSize_32,7),
	195	+
	196	+#ifdef _EM64T_
	197	+ RegName_R8D = REGNAME(OpndKind_GPReg,OpndSize_32,8),
	198	+ RegName_R9D = REGNAME(OpndKind_GPReg,OpndSize_32,9),
	199	+ RegName_R10D = REGNAME(OpndKind_GPReg,OpndSize_32,10),
	200	+ RegName_R11D = REGNAME(OpndKind_GPReg,OpndSize_32,11),
	201	+ RegName_R12D = REGNAME(OpndKind_GPReg,OpndSize_32,12),
	202	+ RegName_R13D = REGNAME(OpndKind_GPReg,OpndSize_32,13),
	203	+ RegName_R14D = REGNAME(OpndKind_GPReg,OpndSize_32,14),
	204	+ RegName_R15D = REGNAME(OpndKind_GPReg,OpndSize_32,15),
	205	+#endif //~_EM64T_
	206	+
	207	+ RegName_AX=REGNAME(OpndKind_GPReg,OpndSize_16,0),
	208	+ RegName_CX=REGNAME(OpndKind_GPReg,OpndSize_16,1),
	209	+ RegName_DX=REGNAME(OpndKind_GPReg,OpndSize_16,2),
	210	+ RegName_BX=REGNAME(OpndKind_GPReg,OpndSize_16,3),
	211	+ RegName_SP=REGNAME(OpndKind_GPReg,OpndSize_16,4),
	212	+ RegName_BP=REGNAME(OpndKind_GPReg,OpndSize_16,5),
	213	+ RegName_SI=REGNAME(OpndKind_GPReg,OpndSize_16,6),
	214	+ RegName_DI=REGNAME(OpndKind_GPReg,OpndSize_16,7),
	215	+
	216	+#ifdef _EM64T_
	217	+ RegName_R8S = REGNAME(OpndKind_GPReg,OpndSize_16,8),
	218	+ RegName_R9S = REGNAME(OpndKind_GPReg,OpndSize_16,9),
	219	+ RegName_R10S = REGNAME(OpndKind_GPReg,OpndSize_16,10),
	220	+ RegName_R11S = REGNAME(OpndKind_GPReg,OpndSize_16,11),
	221	+ RegName_R12S = REGNAME(OpndKind_GPReg,OpndSize_16,12),
	222	+ RegName_R13S = REGNAME(OpndKind_GPReg,OpndSize_16,13),
	223	+ RegName_R14S = REGNAME(OpndKind_GPReg,OpndSize_16,14),
	224	+ RegName_R15S = REGNAME(OpndKind_GPReg,OpndSize_16,15),
	225	+#endif //~_EM64T_
	226	+
	227	+ RegName_AL=REGNAME(OpndKind_GPReg,OpndSize_8,0),
	228	+ RegName_CL=REGNAME(OpndKind_GPReg,OpndSize_8,1),
	229	+ RegName_DL=REGNAME(OpndKind_GPReg,OpndSize_8,2),
	230	+ RegName_BL=REGNAME(OpndKind_GPReg,OpndSize_8,3),
	231	+ // FIXME: Used in enc_tabl.cpp
	232	+ // AH is not accessible on EM64T, instead encoded register is SPL, so decoded
	233	+ // register will return incorrect enum
	234	+ RegName_AH=REGNAME(OpndKind_GPReg,OpndSize_8,4),
	235	+#if !defined(_EM64T_)
	236	+ RegName_CH=REGNAME(OpndKind_GPReg,OpndSize_8,5),
	237	+ RegName_DH=REGNAME(OpndKind_GPReg,OpndSize_8,6),
	238	+ RegName_BH=REGNAME(OpndKind_GPReg,OpndSize_8,7),
	239	+#else
	240	+ RegName_SPL=REGNAME(OpndKind_GPReg,OpndSize_8,4),
	241	+ RegName_BPL=REGNAME(OpndKind_GPReg,OpndSize_8,5),
	242	+ RegName_SIL=REGNAME(OpndKind_GPReg,OpndSize_8,6),
	243	+ RegName_DIL=REGNAME(OpndKind_GPReg,OpndSize_8,7),
	244	+ RegName_R8L=REGNAME(OpndKind_GPReg,OpndSize_8,8),
	245	+ RegName_R9L=REGNAME(OpndKind_GPReg,OpndSize_8,9),
	246	+ RegName_R10L=REGNAME(OpndKind_GPReg,OpndSize_8,10),
	247	+ RegName_R11L=REGNAME(OpndKind_GPReg,OpndSize_8,11),
	248	+ RegName_R12L=REGNAME(OpndKind_GPReg,OpndSize_8,12),
	249	+ RegName_R13L=REGNAME(OpndKind_GPReg,OpndSize_8,13),
	250	+ RegName_R14L=REGNAME(OpndKind_GPReg,OpndSize_8,14),
	251	+ RegName_R15L=REGNAME(OpndKind_GPReg,OpndSize_8,15),
	252	+#endif
	253	+
	254	+ RegName_ES=REGNAME(OpndKind_SReg,OpndSize_16,0),
	255	+ RegName_CS=REGNAME(OpndKind_SReg,OpndSize_16,1),
	256	+ RegName_SS=REGNAME(OpndKind_SReg,OpndSize_16,2),
	257	+ RegName_DS=REGNAME(OpndKind_SReg,OpndSize_16,3),
	258	+ RegName_FS=REGNAME(OpndKind_SReg,OpndSize_16,4),
	259	+ RegName_GS=REGNAME(OpndKind_SReg,OpndSize_16,5),
	260	+
	261	+ RegName_EFLAGS=REGNAME(OpndKind_StatusReg,OpndSize_32,0),
	262	+
	263	+#if !defined(TESTING_ENCODER)
	264	+ RegName_FP0=REGNAME(OpndKind_FPReg,OpndSize_80,0),
	265	+ RegName_FP1=REGNAME(OpndKind_FPReg,OpndSize_80,1),
	266	+ RegName_FP2=REGNAME(OpndKind_FPReg,OpndSize_80,2),
	267	+ RegName_FP3=REGNAME(OpndKind_FPReg,OpndSize_80,3),
	268	+ RegName_FP4=REGNAME(OpndKind_FPReg,OpndSize_80,4),
	269	+ RegName_FP5=REGNAME(OpndKind_FPReg,OpndSize_80,5),
	270	+ RegName_FP6=REGNAME(OpndKind_FPReg,OpndSize_80,6),
	271	+ RegName_FP7=REGNAME(OpndKind_FPReg,OpndSize_80,7),
	272	+#endif
	273	+ RegName_FP0S=REGNAME(OpndKind_FPReg,OpndSize_32,0),
	274	+ RegName_FP1S=REGNAME(OpndKind_FPReg,OpndSize_32,1),
	275	+ RegName_FP2S=REGNAME(OpndKind_FPReg,OpndSize_32,2),
	276	+ RegName_FP3S=REGNAME(OpndKind_FPReg,OpndSize_32,3),
	277	+ RegName_FP4S=REGNAME(OpndKind_FPReg,OpndSize_32,4),
	278	+ RegName_FP5S=REGNAME(OpndKind_FPReg,OpndSize_32,5),
	279	+ RegName_FP6S=REGNAME(OpndKind_FPReg,OpndSize_32,6),
	280	+ RegName_FP7S=REGNAME(OpndKind_FPReg,OpndSize_32,7),
	281	+
	282	+ RegName_FP0D=REGNAME(OpndKind_FPReg,OpndSize_64,0),
	283	+ RegName_FP1D=REGNAME(OpndKind_FPReg,OpndSize_64,1),
	284	+ RegName_FP2D=REGNAME(OpndKind_FPReg,OpndSize_64,2),
	285	+ RegName_FP3D=REGNAME(OpndKind_FPReg,OpndSize_64,3),
	286	+ RegName_FP4D=REGNAME(OpndKind_FPReg,OpndSize_64,4),
	287	+ RegName_FP5D=REGNAME(OpndKind_FPReg,OpndSize_64,5),
	288	+ RegName_FP6D=REGNAME(OpndKind_FPReg,OpndSize_64,6),
	289	+ RegName_FP7D=REGNAME(OpndKind_FPReg,OpndSize_64,7),
	290	+
	291	+#if !defined(TESTING_ENCODER)
	292	+ RegName_XMM0=REGNAME(OpndKind_XMMReg,OpndSize_128,0),
	293	+ RegName_XMM1=REGNAME(OpndKind_XMMReg,OpndSize_128,1),
	294	+ RegName_XMM2=REGNAME(OpndKind_XMMReg,OpndSize_128,2),
	295	+ RegName_XMM3=REGNAME(OpndKind_XMMReg,OpndSize_128,3),
	296	+ RegName_XMM4=REGNAME(OpndKind_XMMReg,OpndSize_128,4),
	297	+ RegName_XMM5=REGNAME(OpndKind_XMMReg,OpndSize_128,5),
	298	+ RegName_XMM6=REGNAME(OpndKind_XMMReg,OpndSize_128,6),
	299	+ RegName_XMM7=REGNAME(OpndKind_XMMReg,OpndSize_128,7),
	300	+
	301	+#ifdef _EM64T_
	302	+ RegName_XMM8 = REGNAME(OpndKind_XMMReg,OpndSize_128,0),
	303	+ RegName_XMM9 = REGNAME(OpndKind_XMMReg,OpndSize_128,1),
	304	+ RegName_XMM10 = REGNAME(OpndKind_XMMReg,OpndSize_128,2),
	305	+ RegName_XMM11 = REGNAME(OpndKind_XMMReg,OpndSize_128,3),
	306	+ RegName_XMM12 = REGNAME(OpndKind_XMMReg,OpndSize_128,4),
	307	+ RegName_XMM13 = REGNAME(OpndKind_XMMReg,OpndSize_128,5),
	308	+ RegName_XMM14 = REGNAME(OpndKind_XMMReg,OpndSize_128,6),
	309	+ RegName_XMM15 = REGNAME(OpndKind_XMMReg,OpndSize_128,7),
	310	+#endif //~_EM64T_
	311	+
	312	+#endif // ~TESTING_ENCODER
	313	+
	314	+ RegName_XMM0S=REGNAME(OpndKind_XMMReg,OpndSize_32,0),
	315	+ RegName_XMM1S=REGNAME(OpndKind_XMMReg,OpndSize_32,1),
	316	+ RegName_XMM2S=REGNAME(OpndKind_XMMReg,OpndSize_32,2),
	317	+ RegName_XMM3S=REGNAME(OpndKind_XMMReg,OpndSize_32,3),
	318	+ RegName_XMM4S=REGNAME(OpndKind_XMMReg,OpndSize_32,4),
	319	+ RegName_XMM5S=REGNAME(OpndKind_XMMReg,OpndSize_32,5),
	320	+ RegName_XMM6S=REGNAME(OpndKind_XMMReg,OpndSize_32,6),
	321	+ RegName_XMM7S=REGNAME(OpndKind_XMMReg,OpndSize_32,7),
	322	+#ifdef _EM64T_
	323	+ RegName_XMM8S=REGNAME(OpndKind_XMMReg,OpndSize_32,8),
	324	+ RegName_XMM9S=REGNAME(OpndKind_XMMReg,OpndSize_32,9),
	325	+ RegName_XMM10S=REGNAME(OpndKind_XMMReg,OpndSize_32,10),
	326	+ RegName_XMM11S=REGNAME(OpndKind_XMMReg,OpndSize_32,11),
	327	+ RegName_XMM12S=REGNAME(OpndKind_XMMReg,OpndSize_32,12),
	328	+ RegName_XMM13S=REGNAME(OpndKind_XMMReg,OpndSize_32,13),
	329	+ RegName_XMM14S=REGNAME(OpndKind_XMMReg,OpndSize_32,14),
	330	+ RegName_XMM15S=REGNAME(OpndKind_XMMReg,OpndSize_32,15),
	331	+#endif // ifdef _EM64T_
	332	+ RegName_XMM0D=REGNAME(OpndKind_XMMReg,OpndSize_64,0),
	333	+ RegName_XMM1D=REGNAME(OpndKind_XMMReg,OpndSize_64,1),
	334	+ RegName_XMM2D=REGNAME(OpndKind_XMMReg,OpndSize_64,2),
	335	+ RegName_XMM3D=REGNAME(OpndKind_XMMReg,OpndSize_64,3),
	336	+ RegName_XMM4D=REGNAME(OpndKind_XMMReg,OpndSize_64,4),
	337	+ RegName_XMM5D=REGNAME(OpndKind_XMMReg,OpndSize_64,5),
	338	+ RegName_XMM6D=REGNAME(OpndKind_XMMReg,OpndSize_64,6),
	339	+ RegName_XMM7D=REGNAME(OpndKind_XMMReg,OpndSize_64,7),
	340	+#ifdef _EM64T_
	341	+ RegName_XMM8D=REGNAME(OpndKind_XMMReg,OpndSize_64,8),
	342	+ RegName_XMM9D=REGNAME(OpndKind_XMMReg,OpndSize_64,9),
	343	+ RegName_XMM10D=REGNAME(OpndKind_XMMReg,OpndSize_64,10),
	344	+ RegName_XMM11D=REGNAME(OpndKind_XMMReg,OpndSize_64,11),
	345	+ RegName_XMM12D=REGNAME(OpndKind_XMMReg,OpndSize_64,12),
	346	+ RegName_XMM13D=REGNAME(OpndKind_XMMReg,OpndSize_64,13),
	347	+ RegName_XMM14D=REGNAME(OpndKind_XMMReg,OpndSize_64,14),
	348	+ RegName_XMM15D=REGNAME(OpndKind_XMMReg,OpndSize_64,15),
	349	+#endif // ifdef _EM64T_
	350	+#ifdef _HAVE_MMX_
	351	+ RegName_MMX0=REGNAME(OpndKind_MMXReg,OpndSize_64,0),
	352	+ RegName_MMX1=REGNAME(OpndKind_MMXReg,OpndSize_64,1),
	353	+ RegName_MMX2=REGNAME(OpndKind_MMXReg,OpndSize_64,2),
	354	+ RegName_MMX3=REGNAME(OpndKind_MMXReg,OpndSize_64,3),
	355	+ RegName_MMX4=REGNAME(OpndKind_MMXReg,OpndSize_64,4),
	356	+ RegName_MMX5=REGNAME(OpndKind_MMXReg,OpndSize_64,5),
	357	+ RegName_MMX6=REGNAME(OpndKind_MMXReg,OpndSize_64,6),
	358	+ RegName_MMX7=REGNAME(OpndKind_MMXReg,OpndSize_64,7),
	359	+#endif // _HAVE_MMX_
	360	+} RegName;
	361	+
	362	+#if 0 // Android x86: use mnemonics defined in enc_defs_ext.h
	363	+/**
	364	+ * Conditional mnemonics.
	365	+ * The values match the 'real' (==processor's) values of the appropriate
	366	+ * condition values used in the opcodes.
	367	+ */
	368	+enum ConditionMnemonic {
	369	+
	370	+ ConditionMnemonic_O=0,
	371	+ ConditionMnemonic_NO=1,
	372	+ ConditionMnemonic_B=2, ConditionMnemonic_NAE=ConditionMnemonic_B, ConditionMnemonic_C=ConditionMnemonic_B,
	373	+ ConditionMnemonic_NB=3, ConditionMnemonic_AE=ConditionMnemonic_NB, ConditionMnemonic_NC=ConditionMnemonic_NB,
	374	+ ConditionMnemonic_Z=4, ConditionMnemonic_E=ConditionMnemonic_Z,
	375	+ ConditionMnemonic_NZ=5, ConditionMnemonic_NE=ConditionMnemonic_NZ,
	376	+ ConditionMnemonic_BE=6, ConditionMnemonic_NA=ConditionMnemonic_BE,
	377	+ ConditionMnemonic_NBE=7, ConditionMnemonic_A=ConditionMnemonic_NBE,
	378	+
	379	+ ConditionMnemonic_S=8,
	380	+ ConditionMnemonic_NS=9,
	381	+ ConditionMnemonic_P=10, ConditionMnemonic_PE=ConditionMnemonic_P,
	382	+ ConditionMnemonic_NP=11, ConditionMnemonic_PO=ConditionMnemonic_NP,
	383	+ ConditionMnemonic_L=12, ConditionMnemonic_NGE=ConditionMnemonic_L,
	384	+ ConditionMnemonic_NL=13, ConditionMnemonic_GE=ConditionMnemonic_NL,
	385	+ ConditionMnemonic_LE=14, ConditionMnemonic_NG=ConditionMnemonic_LE,
	386	+ ConditionMnemonic_NLE=15, ConditionMnemonic_G=ConditionMnemonic_NLE,
	387	+ ConditionMnemonic_Count=16
	388	+};
	389	+
	390	+
	391	+#define CCM(prefix,cond) Mnemonic_##prefix##cond=Mnemonic_##prefix##cc+ConditionMnemonic_##cond
	392	+
	393	+//=========================================================================================================
	394	+enum Mnemonic {
	395	+
	396	+Mnemonic_NULL=0, Mnemonic_Null=Mnemonic_NULL,
	397	+Mnemonic_ADC, // Add with Carry
	398	+Mnemonic_ADD, // Add
	399	+Mnemonic_ADDSD, // Add Scalar Double-Precision Floating-Point Values
	400	+Mnemonic_ADDSS, // Add Scalar Single-Precision Floating-Point Values
	401	+Mnemonic_AND, // Logical AND
	402	+
	403	+Mnemonic_BSF, // Bit scan forward
	404	+Mnemonic_BSR, // Bit scan reverse
	405	+
	406	+Mnemonic_CALL, // Call Procedure
	407	+Mnemonic_CMC, // Complement Carry Flag
	408	+Mnemonic_CWD, Mnemonic_CDQ=Mnemonic_CWD,// Convert Word to Doubleword/Convert Doubleword to Qua T dword
	409	+Mnemonic_CMOVcc, // Conditional Move
	410	+ CCM(CMOV,O),
	411	+ CCM(CMOV,NO),
	412	+ CCM(CMOV,B), CCM(CMOV,NAE), CCM(CMOV,C),
	413	+ CCM(CMOV,NB), CCM(CMOV,AE), CCM(CMOV,NC),
	414	+ CCM(CMOV,Z), CCM(CMOV,E),
	415	+ CCM(CMOV,NZ), CCM(CMOV,NE),
	416	+ CCM(CMOV,BE), CCM(CMOV,NA),
	417	+ CCM(CMOV,NBE), CCM(CMOV,A),
	418	+
	419	+ CCM(CMOV,S),
	420	+ CCM(CMOV,NS),
	421	+ CCM(CMOV,P), CCM(CMOV,PE),
	422	+ CCM(CMOV,NP), CCM(CMOV,PO),
	423	+ CCM(CMOV,L), CCM(CMOV,NGE),
	424	+ CCM(CMOV,NL), CCM(CMOV,GE),
	425	+ CCM(CMOV,LE), CCM(CMOV,NG),
	426	+ CCM(CMOV,NLE), CCM(CMOV,G),
	427	+
	428	+Mnemonic_CMP, // Compare Two Operands
	429	+Mnemonic_CMPXCHG, // Compare and exchange
	430	+Mnemonic_CMPXCHG8B, // Compare and Exchange 8 Bytes
	431	+Mnemonic_CMPSB, // Compare Two Bytes at DS:ESI and ES:EDI
	432	+Mnemonic_CMPSW, // Compare Two Words at DS:ESI and ES:EDI
	433	+Mnemonic_CMPSD, // Compare Two Doublewords at DS:ESI and ES:EDI
	434	+//
	435	+// double -> float
	436	+Mnemonic_CVTSD2SS, // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
	437	+// double -> I_32
	438	+Mnemonic_CVTSD2SI, // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer
	439	+// double [truncated] -> I_32
	440	+Mnemonic_CVTTSD2SI, // Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Doubleword Integer
	441	+//
	442	+// float -> double
	443	+Mnemonic_CVTSS2SD, // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
	444	+// float -> I_32
	445	+Mnemonic_CVTSS2SI, // Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer
	446	+// float [truncated] -> I_32
	447	+Mnemonic_CVTTSS2SI, // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
	448	+//
	449	+// I_32 -> double
	450	+Mnemonic_CVTSI2SD, // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
	451	+// I_32 -> float
	452	+Mnemonic_CVTSI2SS, // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
	453	+
	454	+Mnemonic_COMISD, // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS
	455	+Mnemonic_COMISS, // Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS
	456	+Mnemonic_DEC, // Decrement by 1
	457	+//Mnemonic_DIV, // Unsigned Divide
	458	+Mnemonic_DIVSD, // Divide Scalar Double-Precision Floating-Point Values
	459	+Mnemonic_DIVSS, // Divide Scalar Single-Precision Floating-Point Values
	460	+
	461	+#ifdef _HAVE_MMX_
	462	+Mnemonic_EMMS, // Empty MMX Technology State
	463	+#endif
	464	+
	465	+Mnemonic_ENTER, // ENTER-Make Stack Frame for Procedure Parameters
	466	+Mnemonic_FLDCW, // Load FPU control word
	467	+Mnemonic_FADDP,
	468	+Mnemonic_FLDZ,
	469	+Mnemonic_FADD,
	470	+Mnemonic_FSUBP,
	471	+Mnemonic_FSUB,
	472	+Mnemonic_FISUB,
	473	+Mnemonic_FMUL,
	474	+Mnemonic_FMULP,
	475	+Mnemonic_FDIVP,
	476	+Mnemonic_FDIV,
	477	+Mnemonic_FUCOMPP,
	478	+Mnemonic_FRNDINT,
	479	+Mnemonic_FNSTCW, // Store FPU control word
	480	+Mnemonic_FSTSW, // Store FPU status word
	481	+Mnemonic_FNSTSW, // Store FPU status word
	482	+//Mnemonic_FDECSTP, // Decrement Stack-Top Pointer
	483	+Mnemonic_FILD, // Load Integer
	484	+Mnemonic_FLD, // Load Floating Point Value
	485	+Mnemonic_FLDLG2,
	486	+Mnemonic_FLDLN2,
	487	+Mnemonic_FLD1,
	488	+
	489	+Mnemonic_FCLEX, // Clear Exceptions
	490	+Mnemonic_FCHS, // Change sign of ST0
	491	+Mnemonic_FNCLEX, // Clear Exceptions
	492	+
	493	+//Mnemonic_FINCSTP, // Increment Stack-Top Pointer
	494	+Mnemonic_FIST, // Store Integer
	495	+Mnemonic_FISTP, // Store Integer, pop FPU stack
	496	+Mnemonic_FISTTP, // Store Integer with Truncation
	497	+Mnemonic_FPREM, // Partial Remainder
	498	+Mnemonic_FPREM1, // Partial Remainder
	499	+Mnemonic_FST, // Store Floating Point Value
	500	+Mnemonic_FSTP, // Store Floating Point Value and pop the FP stack
	501	+Mnemonic_FSQRT, //Computes the square root of the source value in the stack and pop the FP stack
	502	+Mnemonic_FABS, //Computes the absolute value of the source value in the stack and pop the FP stack
	503	+Mnemonic_FSIN, //Computes the sine of the source value in the stack and pop the FP stack
	504	+Mnemonic_FCOS, //Computes the cosine of the source value in the stack and pop the FP stack
	505	+Mnemonic_FPTAN, //Computes the tangent of the source value in the stack and pop the FP stack
	506	+Mnemonic_FYL2X,
	507	+Mnemonic_FYL2XP1,
	508	+Mnemonic_F2XM1,
	509	+Mnemonic_FPATAN,
	510	+Mnemonic_FXCH,
	511	+Mnemonic_FSCALE,
	512	+
	513	+Mnemonic_XCHG,
	514	+Mnemonic_DIV, // Unsigned Divide
	515	+Mnemonic_IDIV, // Signed Divide
	516	+Mnemonic_MUL, // Unsigned Multiply
	517	+Mnemonic_IMUL, // Signed Multiply
	518	+Mnemonic_INC, // Increment by 1
	519	+Mnemonic_INT3, // Call break point
	520	+Mnemonic_Jcc, // Jump if Condition Is Met
	521	+ CCM(J,O),
	522	+ CCM(J,NO),
	523	+ CCM(J,B), CCM(J,NAE), CCM(J,C),
	524	+ CCM(J,NB), CCM(J,AE), CCM(J,NC),
	525	+ CCM(J,Z), CCM(J,E),
	526	+ CCM(J,NZ), CCM(J,NE),
	527	+ CCM(J,BE), CCM(J,NA),
	528	+ CCM(J,NBE), CCM(J,A),
	529	+ CCM(J,S),
	530	+ CCM(J,NS),
	531	+ CCM(J,P), CCM(J,PE),
	532	+ CCM(J,NP), CCM(J,PO),
	533	+ CCM(J,L), CCM(J,NGE),
	534	+ CCM(J,NL), CCM(J,GE),
	535	+ CCM(J,LE), CCM(J,NG),
	536	+ CCM(J,NLE), CCM(J,G),
	537	+Mnemonic_JMP, // Jump
	538	+Mnemonic_LEA, // Load Effective Address
	539	+Mnemonic_LEAVE, // High Level Procedure Exit
	540	+Mnemonic_LOOP, // Loop according to ECX counter
	541	+Mnemonic_LOOPE, // Loop according to ECX counter
	542	+Mnemonic_LOOPNE, Mnemonic_LOOPNZ = Mnemonic_LOOPNE, // Loop according to ECX
	543	+Mnemonic_LAHF, // Load Flags into AH
	544	+Mnemonic_MOV, // Move
	545	+Mnemonic_MOVD, // Move Double word
	546	+Mnemonic_MOVQ, // Move Quadword
	547	+/Mnemonic_MOVS, // Move Data from String to String/
	548	+// MOVS is a special case: see encoding table for more details,
	549	+Mnemonic_MOVS8, Mnemonic_MOVS16, Mnemonic_MOVS32, Mnemonic_MOVS64,
	550	+//
	551	+Mnemonic_MOVAPD, // Move Scalar Double-Precision Floating-Point Value
	552	+Mnemonic_MOVSD, // Move Scalar Double-Precision Floating-Point Value
	553	+Mnemonic_MOVSS, // Move Scalar Single-Precision Floating-Point Values
	554	+Mnemonic_MOVSX, // Move with Sign-Extension
	555	+Mnemonic_MOVZX, // Move with Zero-Extend
	556	+//Mnemonic_MUL, // Unsigned Multiply
	557	+Mnemonic_MULSD, // Multiply Scalar Double-Precision Floating-Point Values
	558	+Mnemonic_MULSS, // Multiply Scalar Single-Precision Floating-Point Values
	559	+Mnemonic_NEG, // Two's Complement Negation
	560	+Mnemonic_NOP, // No Operation
	561	+Mnemonic_NOT, // One's Complement Negation
	562	+Mnemonic_OR, // Logical Inclusive OR
	563	+Mnemonic_PREFETCH, // prefetch
	564	+
	565	+#ifdef _HAVE_MMX_
	566	+ Mnemonic_PADDQ, // Add Packed Quadword Integers
	567	+ Mnemonic_PAND, // Logical AND
	568	+ Mnemonic_POR, // Bitwise Logical OR
	569	+ Mnemonic_PSUBQ, // Subtract Packed Quadword Integers
	570	+#endif
	571	+
	572	+Mnemonic_PXOR, // Logical Exclusive OR
	573	+Mnemonic_POP, // Pop a Value from the Stack
	574	+Mnemonic_POPFD, // Pop a Value of EFLAGS register from the Stack
	575	+Mnemonic_PUSH, // Push Word or Doubleword Onto the Stack
	576	+Mnemonic_PUSHFD, // Push EFLAGS Doubleword Onto the Stack
	577	+Mnemonic_RET, // Return from Procedure
	578	+
	579	+Mnemonic_SETcc, // Set Byte on Condition
	580	+ CCM(SET,O),
	581	+ CCM(SET,NO),
	582	+ CCM(SET,B), CCM(SET,NAE), CCM(SET,C),
	583	+ CCM(SET,NB), CCM(SET,AE), CCM(SET,NC),
	584	+ CCM(SET,Z), CCM(SET,E),
	585	+ CCM(SET,NZ), CCM(SET,NE),
	586	+ CCM(SET,BE), CCM(SET,NA),
	587	+ CCM(SET,NBE), CCM(SET,A),
	588	+ CCM(SET,S),
	589	+ CCM(SET,NS),
	590	+ CCM(SET,P), CCM(SET,PE),
	591	+ CCM(SET,NP), CCM(SET,PO),
	592	+ CCM(SET,L), CCM(SET,NGE),
	593	+ CCM(SET,NL), CCM(SET,GE),
	594	+ CCM(SET,LE), CCM(SET,NG),
	595	+ CCM(SET,NLE), CCM(SET,G),
	596	+
	597	+Mnemonic_SAL, Mnemonic_SHL=Mnemonic_SAL,// Shift left
	598	+Mnemonic_SAR, // Shift right
	599	+Mnemonic_ROR, // Rotate right
	600	+Mnemonic_RCR, // Rotate right through CARRY flag
	601	+Mnemonic_ROL, // Rotate left
	602	+Mnemonic_RCL, // Rotate left through CARRY flag
	603	+Mnemonic_SHR, // Unsigned shift right
	604	+Mnemonic_SHRD, // Double Precision Shift Right
	605	+Mnemonic_SHLD, // Double Precision Shift Left
	606	+
	607	+Mnemonic_SBB, // Integer Subtraction with Borrow
	608	+Mnemonic_SUB, // Subtract
	609	+Mnemonic_SUBSD, // Subtract Scalar Double-Precision Floating-Point Values
	610	+Mnemonic_SUBSS, // Subtract Scalar Single-Precision Floating-Point Values
	611	+
	612	+Mnemonic_TEST, // Logical Compare
	613	+
	614	+Mnemonic_UCOMISD, // Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS
	615	+Mnemonic_UCOMISS, // Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS
	616	+
	617	+Mnemonic_XOR, // Logical Exclusive OR
	618	+//
	619	+// packed things,
	620	+//
	621	+Mnemonic_XORPD, // Bitwise Logical XOR for Double-Precision Floating-Point Values
	622	+Mnemonic_XORPS, // Bitwise Logical XOR for Single-Precision Floating-Point Values
	623	+
	624	+Mnemonic_CVTDQ2PD, // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values
	625	+Mnemonic_CVTTPD2DQ, // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers
	626	+
	627	+Mnemonic_CVTDQ2PS, // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values
	628	+Mnemonic_CVTTPS2DQ, // Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers
	629	+//
	630	+// String operations
	631	+//
	632	+Mnemonic_STD, // Set direction flag
	633	+Mnemonic_CLD, // Clear direction flag
	634	+Mnemonic_SCAS, // Scan string
	635	+Mnemonic_STOS, // Store string
	636	+
	637	+//
	638	+Mnemonic_WAIT, // Check pending pending unmasked floating-point exception
	639	+//
	640	+Mnemonic_Count
	641	+};
	642	+
	643	+#undef CCM
	644	+#endif
	645	+
	646	+/**
	647	+ * @brief Instruction prefixes, according to arch manual.
	648	+ */
	649	+typedef enum InstPrefix {
	650	+ InstPrefix_Null = 0,
	651	+ // Group 1
	652	+ InstPrefix_LOCK = 0xF0,
	653	+ InstPrefix_REPNE = 0xF2,
	654	+ InstPrefix_REPNZ = InstPrefix_REPNE,
	655	+ InstPrefix_REP = 0xF3, InstPrefix_REPZ = InstPrefix_REP,
	656	+ // Group 2
	657	+ InstPrefix_CS = 0x2E,
	658	+ InstPrefix_SS = 0x36,
	659	+ InstPrefix_DS = 0x3E,
	660	+ InstPrefix_ES = 0x26,
	661	+ InstPrefix_FS = 0x64,
	662	+ InstPrefix_GS = 0x65,
	663	+ //
	664	+ InstPrefix_HintTaken = 0x3E,
	665	+ InstPrefix_HintNotTaken = 0x2E,
	666	+ // Group 3
	667	+ InstPrefix_OpndSize = 0x66,
	668	+ // Group 4
	669	+ InstPrefix_AddrSize = 0x67
	670	+} InstPrefix;
	671	+
	672	+inline unsigned getSizeBytes(OpndSize sz)
	673	+{
	674	+ if (sz==OpndSize_64) { return 8; }
	675	+ if (sz==OpndSize_32) { return 4; }
	676	+ if (sz==OpndSize_16) { return 2; }
	677	+ if (sz==OpndSize_8) { return 1; }
	678	+ assert(false);
	679	+ return 0;
	680	+}
	681	+
	682	+inline bool isRegKind(OpndKind kind)
	683	+{
	684	+ return OpndKind_GPReg<= kind && kind<=OpndKind_MaxRegKind;
	685	+}
	686	+
	687	+/**
	688	+ * @brief Returns RegName for a given name.
	689	+ *
	690	+ * Name is case-insensitive.
	691	+ * @param regname - string name of a register
	692	+ * @return RegName for the given name, or RegName_Null if name is invalid
	693	+ */
	694	+RegName getRegName(const char * regname);
	695	+/**
	696	+ * Constructs RegName from the given OpndKind, size and index.
	697	+ */
	698	+inline RegName getRegName(OpndKind k, OpndSize s, int idx)
	699	+{
	700	+ return (RegName)REGNAME(k,s,idx);
	701	+}
	702	+/**
	703	+ * Extracts a bit mask with a bit set at the position of the register's index.
	704	+ */
	705	+inline unsigned getRegMask(RegName reg)
	706	+{
	707	+ return 1<<(reg&0xff);
	708	+}
	709	+/**
	710	+ * @brief Extracts OpndKind from the RegName.
	711	+ */
	712	+inline OpndKind getRegKind(RegName reg)
	713	+{
	714	+ return (OpndKind)(reg>>24);
	715	+}
	716	+/**
	717	+ * @brief Extracts OpndSize from RegName.
	718	+ */
	719	+inline OpndSize getRegSize(RegName reg)
	720	+{
	721	+ return (OpndSize)((reg>>16)&0xFF);
	722	+}
	723	+/**
	724	+ * Extracts an index from the given RegName.
	725	+ */
	726	+inline unsigned char getRegIndex(RegName reg)
	727	+{
	728	+ return (unsigned char)(reg&0xFF);
	729	+}
	730	+/**
	731	+ * Returns a string name of the given RegName. The name returned is in upper-case.
	732	+ * Returns NULL if invalid RegName specified.
	733	+ */
	734	+const char * getRegNameString(RegName reg);
	735	+/**
	736	+ * Returns string name of a given OpndSize.
	737	+ * Returns NULL if invalid OpndSize passed.
	738	+ */
	739	+const char * getOpndSizeString(OpndSize size);
	740	+/**
	741	+ * Returns OpndSize passed by its string representation (case insensitive).
	742	+ * Returns OpndSize_Null if invalid string specified.
	743	+ * The 'sizeString' can not be NULL.
	744	+ */
	745	+OpndSize getOpndSize(const char * sizeString);
	746	+/**
	747	+ * Returns string name of a given OpndKind.
	748	+ * Returns NULL if the passed kind is invalid.
	749	+ */
	750	+const char * getOpndKindString(OpndKind kind);
	751	+/**
	752	+ * Returns OpndKind found by its string representation (case insensitive).
	753	+ * Returns OpndKind_Null if the name is invalid.
	754	+ * The 'kindString' can not be NULL.
	755	+ */
	756	+OpndKind getOpndKind(const char * kindString);
	757	+/**
	758	+ *
	759	+ */
	760	+const char * getConditionString(ConditionMnemonic cm);
	761	+
	762	+/**
	763	+ * Constructs an RegName with the same index and kind, but with a different size from
	764	+ * the given RegName (i.e. getRegAlias(EAX, OpndSize_16) => AX; getRegAlias(BL, OpndSize_32) => EBX).
	765	+ * The constructed RegName is not checked in any way and thus may be invalid.
	766	+ * Note, that the aliasing does not work for at least AH,BH,CH,DH, ESI, EDI, ESP and EBP regs.
	767	+ */
	768	+inline RegName getAliasReg(RegName reg, OpndSize sz)
	769	+{
	770	+ return (RegName)REGNAME(getRegKind(reg), sz, getRegIndex(reg));
	771	+}
	772	+
	773	+/**
	774	+ * brief Tests two RegName-s of the same kind for equality.
	775	+ *
	776	+ * @note Does work for 8 bit general purpose registers (AH, AL, BH, BL, etc).
	777	+ */
	778	+inline bool equals(RegName r0, RegName r1)
	779	+{
	780	+ return getRegKind(r0) == getRegKind(r1) &&
	781	+ getRegIndex(r0) == getRegIndex(r1);
	782	+}
	783	+
	784	+ENCODER_NAMESPACE_END
	785	+
	786	+#endif // ifndef _ENCODER_DEFS_H_

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/enc_defs_ext.h

		@@ -0,0 +1,365 @@
	1	+/*
	2	+ * Copyright (C) 2012 The Android Open Source Project
	3	+ *
	4	+ * Licensed under the Apache License, Version 2.0 (the "License");
	5	+ * you may not use this file except in compliance with the License.
	6	+ * You may obtain a copy of the License at
	7	+ *
	8	+ * http://www.apache.org/licenses/LICENSE-2.0
	9	+ *
	10	+ * Unless required by applicable law or agreed to in writing, software
	11	+ * distributed under the License is distributed on an "AS IS" BASIS,
	12	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	+ * See the License for the specific language governing permissions and
	14	+ * limitations under the License.
	15	+ */
	16	+
	17	+#ifndef _ENCODER_DEFS_EXT_H_
	18	+#define _ENCODER_DEFS_EXT_H_
	19	+
	20	+
	21	+// Used to isolate experimental or being tuned encoder into a separate
	22	+// namespace so it can coexist with a stable one in the same bundle.
	23	+#ifdef ENCODER_ISOLATE
	24	+ #define ENCODER_NAMESPACE_START namespace enc_ia32 {
	25	+ #define ENCODER_NAMESPACE_END };
	26	+#else
	27	+ #define ENCODER_NAMESPACE_START
	28	+ #define ENCODER_NAMESPACE_END
	29	+#endif
	30	+
	31	+ENCODER_NAMESPACE_START
	32	+typedef enum OpndSize {
	33	+ /**
	34	+ * A change must be balanced with at least the following places:
	35	+ * Ia32IRConstants.h :: getByteSize() uses some presumptions about OpndSize_ values
	36	+ * Ia32::Constraint-s use the OpndSize as a mask
	37	+ * encoder.cpp & encoder_master_info.cpp uses OpndSize as an index for hashing
	38	+ * - perhaps there are much more places
	39	+ */
	40	+ OpndSize_Null = 0,
	41	+ OpndSize_8 = 0x01,
	42	+ OpndSize_16 = 0x02,
	43	+ OpndSize_32 = 0x04,
	44	+ OpndSize_64 = 0x08,
	45	+#if !defined(TESTING_ENCODER)
	46	+ OpndSize_80 = 0x10,
	47	+ OpndSize_128 = 0x20,
	48	+#endif
	49	+ OpndSize_Max,
	50	+ OpndSize_Any = 0x3F,
	51	+ OpndSize_Default = OpndSize_Any
	52	+} OpndSize;
	53	+
	54	+/**
	55	+ * Conditional mnemonics.
	56	+ * The values match the 'real' (==processor's) values of the appropriate
	57	+ * condition values used in the opcodes.
	58	+ */
	59	+typedef enum ConditionMnemonic {
	60	+
	61	+ ConditionMnemonic_O=0,
	62	+ ConditionMnemonic_NO=1,
	63	+ ConditionMnemonic_B=2, ConditionMnemonic_NAE=ConditionMnemonic_B, ConditionMnemonic_C=ConditionMnemonic_B,
	64	+ ConditionMnemonic_NB=3, ConditionMnemonic_AE=ConditionMnemonic_NB, ConditionMnemonic_NC=ConditionMnemonic_NB,
	65	+ ConditionMnemonic_Z=4, ConditionMnemonic_E=ConditionMnemonic_Z,
	66	+ ConditionMnemonic_NZ=5, ConditionMnemonic_NE=ConditionMnemonic_NZ,
	67	+ ConditionMnemonic_BE=6, ConditionMnemonic_NA=ConditionMnemonic_BE,
	68	+ ConditionMnemonic_NBE=7, ConditionMnemonic_A=ConditionMnemonic_NBE,
	69	+
	70	+ ConditionMnemonic_S=8,
	71	+ ConditionMnemonic_NS=9,
	72	+ ConditionMnemonic_P=10, ConditionMnemonic_PE=ConditionMnemonic_P,
	73	+ ConditionMnemonic_NP=11, ConditionMnemonic_PO=ConditionMnemonic_NP,
	74	+ ConditionMnemonic_L=12, ConditionMnemonic_NGE=ConditionMnemonic_L,
	75	+ ConditionMnemonic_NL=13, ConditionMnemonic_GE=ConditionMnemonic_NL,
	76	+ ConditionMnemonic_LE=14, ConditionMnemonic_NG=ConditionMnemonic_LE,
	77	+ ConditionMnemonic_NLE=15, ConditionMnemonic_G=ConditionMnemonic_NLE,
	78	+ ConditionMnemonic_Count=16
	79	+} ConditionMnemonic;
	80	+
	81	+
	82	+#define CCM(prefix,cond) Mnemonic_##prefix##cond=Mnemonic_##prefix##cc+ConditionMnemonic_##cond
	83	+
	84	+//=========================================================================================================
	85	+typedef enum Mnemonic {
	86	+
	87	+Mnemonic_NULL=0, Mnemonic_Null=Mnemonic_NULL,
	88	+Mnemonic_JMP, // Jump
	89	+Mnemonic_MOV, // Move
	90	+Mnemonic_Jcc, // Jump if Condition Is Met
	91	+ CCM(J,O),
	92	+ CCM(J,NO),
	93	+ CCM(J,B), CCM(J,NAE), CCM(J,C),
	94	+ CCM(J,NB), CCM(J,AE), CCM(J,NC),
	95	+ CCM(J,Z), CCM(J,E),
	96	+ CCM(J,NZ), CCM(J,NE),
	97	+ CCM(J,BE), CCM(J,NA),
	98	+ CCM(J,NBE), CCM(J,A),
	99	+ CCM(J,S),
	100	+ CCM(J,NS),
	101	+ CCM(J,P), CCM(J,PE),
	102	+ CCM(J,NP), CCM(J,PO),
	103	+ CCM(J,L), CCM(J,NGE),
	104	+ CCM(J,NL), CCM(J,GE),
	105	+ CCM(J,LE), CCM(J,NG),
	106	+ CCM(J,NLE), CCM(J,G),
	107	+Mnemonic_CALL, // Call Procedure
	108	+
	109	+Mnemonic_ADC, // Add with Carry
	110	+Mnemonic_ADD, // Add
	111	+Mnemonic_ADDSD, // Add Scalar Double-Precision Floating-Point Values
	112	+Mnemonic_ADDSS, // Add Scalar Single-Precision Floating-Point Values
	113	+Mnemonic_AND, // Logical AND
	114	+
	115	+Mnemonic_BSF, // Bit scan forward
	116	+Mnemonic_BSR, // Bit scan reverse
	117	+
	118	+Mnemonic_CMC, // Complement Carry Flag
	119	+Mnemonic_CWD, Mnemonic_CDQ=Mnemonic_CWD,// Convert Word to Doubleword/Convert Doubleword to Qua T dword
	120	+Mnemonic_CMOVcc, // Conditional Move
	121	+ CCM(CMOV,O),
	122	+ CCM(CMOV,NO),
	123	+ CCM(CMOV,B), CCM(CMOV,NAE), CCM(CMOV,C),
	124	+ CCM(CMOV,NB), CCM(CMOV,AE), CCM(CMOV,NC),
	125	+ CCM(CMOV,Z), CCM(CMOV,E),
	126	+ CCM(CMOV,NZ), CCM(CMOV,NE),
	127	+ CCM(CMOV,BE), CCM(CMOV,NA),
	128	+ CCM(CMOV,NBE), CCM(CMOV,A),
	129	+
	130	+ CCM(CMOV,S),
	131	+ CCM(CMOV,NS),
	132	+ CCM(CMOV,P), CCM(CMOV,PE),
	133	+ CCM(CMOV,NP), CCM(CMOV,PO),
	134	+ CCM(CMOV,L), CCM(CMOV,NGE),
	135	+ CCM(CMOV,NL), CCM(CMOV,GE),
	136	+ CCM(CMOV,LE), CCM(CMOV,NG),
	137	+ CCM(CMOV,NLE), CCM(CMOV,G),
	138	+
	139	+Mnemonic_CMP, // Compare Two Operands
	140	+Mnemonic_CMPXCHG, // Compare and exchange
	141	+Mnemonic_CMPXCHG8B, // Compare and Exchange 8 Bytes
	142	+Mnemonic_CMPSB, // Compare Two Bytes at DS:ESI and ES:EDI
	143	+Mnemonic_CMPSW, // Compare Two Words at DS:ESI and ES:EDI
	144	+Mnemonic_CMPSD, // Compare Two Doublewords at DS:ESI and ES:EDI
	145	+//
	146	+// double -> float
	147	+Mnemonic_CVTSD2SS, // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
	148	+// double -> I_32
	149	+Mnemonic_CVTSD2SI, // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer
	150	+// double [truncated] -> I_32
	151	+Mnemonic_CVTTSD2SI, // Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Doubleword Integer
	152	+//
	153	+// float -> double
	154	+Mnemonic_CVTSS2SD, // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
	155	+// float -> I_32
	156	+Mnemonic_CVTSS2SI, // Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer
	157	+// float [truncated] -> I_32
	158	+Mnemonic_CVTTSS2SI, // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
	159	+//
	160	+// I_32 -> double
	161	+Mnemonic_CVTSI2SD, // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
	162	+// I_32 -> float
	163	+Mnemonic_CVTSI2SS, // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
	164	+
	165	+Mnemonic_COMISD, // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS
	166	+Mnemonic_COMISS, // Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS
	167	+Mnemonic_DEC, // Decrement by 1
	168	+Mnemonic_DIVSD, // Divide Scalar Double-Precision Floating-Point Values
	169	+Mnemonic_DIVSS, // Divide Scalar Single-Precision Floating-Point Values
	170	+Mnemonic_ENTER, // ENTER-Make Stack Frame for Procedure Parameters
	171	+Mnemonic_FLDCW, // Load FPU control word
	172	+Mnemonic_FADDP,
	173	+Mnemonic_FLDZ,
	174	+Mnemonic_FADD,
	175	+Mnemonic_FSUBP,
	176	+Mnemonic_FSUB,
	177	+Mnemonic_FISUB,
	178	+Mnemonic_FMUL,
	179	+Mnemonic_FMULP,
	180	+Mnemonic_FDIVP,
	181	+Mnemonic_FDIV,
	182	+Mnemonic_FUCOM,
	183	+Mnemonic_FUCOMI,
	184	+Mnemonic_FUCOMP,
	185	+Mnemonic_FUCOMIP,
	186	+Mnemonic_FUCOMPP,
	187	+Mnemonic_FRNDINT,
	188	+Mnemonic_FNSTCW, // Store FPU control word
	189	+Mnemonic_FSTSW, // Store FPU status word
	190	+Mnemonic_FNSTSW, // Store FPU status word
	191	+Mnemonic_FILD, // Load Integer
	192	+Mnemonic_FLD, // Load Floating Point Value
	193	+Mnemonic_FLDLG2,
	194	+Mnemonic_FLDLN2,
	195	+Mnemonic_FLD1,
	196	+
	197	+Mnemonic_FCLEX, // Clear Exceptions
	198	+Mnemonic_FCHS, // Change sign of ST0
	199	+Mnemonic_FNCLEX, // Clear Exceptions
	200	+Mnemonic_FIST, // Store Integer
	201	+Mnemonic_FISTP, // Store Integer, pop FPU stack
	202	+Mnemonic_FISTTP, // Store Integer with Truncation
	203	+Mnemonic_FPREM, // Partial Remainder
	204	+Mnemonic_FPREM1, // Partial Remainder
	205	+Mnemonic_FST, // Store Floating Point Value
	206	+Mnemonic_FSTP, // Store Floating Point Value and pop the FP stack
	207	+Mnemonic_FSQRT, //Computes the square root of the source value in the stack and pop the FP stack
	208	+Mnemonic_FABS, //Computes the absolute value of the source value in the stack and pop the FP stack
	209	+Mnemonic_FSIN, //Computes the sine of the source value in the stack and pop the FP stack
	210	+Mnemonic_FCOS, //Computes the cosine of the source value in the stack and pop the FP stack
	211	+Mnemonic_FPTAN, //Computes the tangent of the source value in the stack and pop the FP stack
	212	+Mnemonic_FYL2X,
	213	+Mnemonic_FYL2XP1,
	214	+Mnemonic_F2XM1,
	215	+Mnemonic_FPATAN,
	216	+Mnemonic_FXCH,
	217	+Mnemonic_FSCALE,
	218	+
	219	+Mnemonic_XCHG,
	220	+Mnemonic_DIV, // Unsigned Divide
	221	+Mnemonic_IDIV, // Signed Divide
	222	+Mnemonic_MUL, // Unsigned Multiply
	223	+Mnemonic_IMUL, // Signed Multiply
	224	+Mnemonic_INC, // Increment by 1
	225	+Mnemonic_INT3, // Call break point
	226	+
	227	+Mnemonic_LEA, // Load Effective Address
	228	+Mnemonic_LEAVE, // High Level Procedure Exit
	229	+Mnemonic_LOOP, // Loop according to ECX counter
	230	+Mnemonic_LOOPE, // Loop according to ECX counter
	231	+Mnemonic_LOOPNE, Mnemonic_LOOPNZ = Mnemonic_LOOPNE, // Loop according to ECX
	232	+Mnemonic_LAHF, // Load Flags into AH
	233	+Mnemonic_MOVD, // Move Double word
	234	+Mnemonic_MOVQ, // Move Quadword
	235	+Mnemonic_MOVS8,
	236	+Mnemonic_MOVS16,
	237	+Mnemonic_MOVS32,
	238	+Mnemonic_MOVS64,
	239	+Mnemonic_MOVAPD, // Move Scalar Double-Precision Floating-Point Value
	240	+Mnemonic_MOVSD, // Move Scalar Double-Precision Floating-Point Value
	241	+Mnemonic_MOVSS, // Move Scalar Single-Precision Floating-Point Values
	242	+Mnemonic_MOVSX, // Move with Sign-Extension
	243	+Mnemonic_MOVZX, // Move with Zero-Extend
	244	+Mnemonic_MULSD, // Multiply Scalar Double-Precision Floating-Point Values
	245	+Mnemonic_MULSS, // Multiply Scalar Single-Precision Floating-Point Values
	246	+Mnemonic_NEG, // Two's Complement Negation
	247	+Mnemonic_NOP, // No Operation
	248	+Mnemonic_NOT, // One's Complement Negation
	249	+Mnemonic_OR, // Logical Inclusive OR
	250	+Mnemonic_PREFETCH, // prefetch
	251	+Mnemonic_PADDQ, // Add Packed Quadword Integers
	252	+Mnemonic_PAND, // Logical AND
	253	+Mnemonic_POR, // Bitwise Logical OR
	254	+Mnemonic_PSUBQ, // Subtract Packed Quadword Integers
	255	+Mnemonic_PANDN,
	256	+Mnemonic_PSLLQ,
	257	+Mnemonic_PSRLQ,
	258	+Mnemonic_PXOR, // Logical Exclusive OR
	259	+Mnemonic_POP, // Pop a Value from the Stack
	260	+Mnemonic_POPFD, // Pop a Value of EFLAGS register from the Stack
	261	+Mnemonic_PUSH, // Push Word or Doubleword Onto the Stack
	262	+Mnemonic_PUSHFD, // Push EFLAGS Doubleword Onto the Stack
	263	+Mnemonic_RET, // Return from Procedure
	264	+
	265	+Mnemonic_SETcc, // Set Byte on Condition
	266	+ CCM(SET,O),
	267	+ CCM(SET,NO),
	268	+ CCM(SET,B), CCM(SET,NAE), CCM(SET,C),
	269	+ CCM(SET,NB), CCM(SET,AE), CCM(SET,NC),
	270	+ CCM(SET,Z), CCM(SET,E),
	271	+ CCM(SET,NZ), CCM(SET,NE),
	272	+ CCM(SET,BE), CCM(SET,NA),
	273	+ CCM(SET,NBE), CCM(SET,A),
	274	+ CCM(SET,S),
	275	+ CCM(SET,NS),
	276	+ CCM(SET,P), CCM(SET,PE),
	277	+ CCM(SET,NP), CCM(SET,PO),
	278	+ CCM(SET,L), CCM(SET,NGE),
	279	+ CCM(SET,NL), CCM(SET,GE),
	280	+ CCM(SET,LE), CCM(SET,NG),
	281	+ CCM(SET,NLE), CCM(SET,G),
	282	+
	283	+Mnemonic_SAL, Mnemonic_SHL=Mnemonic_SAL,// Shift left
	284	+Mnemonic_SAR, // Unsigned shift right
	285	+Mnemonic_ROR, // Rotate right
	286	+Mnemonic_RCR, // Rotate right through CARRY flag
	287	+Mnemonic_ROL, // Rotate left
	288	+Mnemonic_RCL, // Rotate left through CARRY flag
	289	+Mnemonic_SHR, // Signed shift right
	290	+Mnemonic_SHRD, // Double Precision Shift Right
	291	+Mnemonic_SHLD, // Double Precision Shift Left
	292	+
	293	+Mnemonic_SBB, // Integer Subtraction with Borrow
	294	+Mnemonic_SUB, // Subtract
	295	+Mnemonic_SUBSD, // Subtract Scalar Double-Precision Floating-Point Values
	296	+Mnemonic_SUBSS, // Subtract Scalar Single-Precision Floating-Point Values
	297	+
	298	+Mnemonic_TEST, // Logical Compare
	299	+
	300	+Mnemonic_UCOMISD, // Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS
	301	+Mnemonic_UCOMISS, // Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS
	302	+
	303	+Mnemonic_XOR, // Logical Exclusive OR
	304	+//
	305	+// packed things,
	306	+//
	307	+Mnemonic_XORPD, // Bitwise Logical XOR for Double-Precision Floating-Point Values
	308	+Mnemonic_XORPS, // Bitwise Logical XOR for Single-Precision Floating-Point Values
	309	+
	310	+Mnemonic_CVTDQ2PD, // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values
	311	+Mnemonic_CVTTPD2DQ, // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers
	312	+
	313	+Mnemonic_CVTDQ2PS, // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values
	314	+Mnemonic_CVTTPS2DQ, // Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers
	315	+//
	316	+// String operations
	317	+//
	318	+Mnemonic_STD, // Set direction flag
	319	+Mnemonic_CLD, // Clear direction flag
	320	+Mnemonic_SCAS, // Scan string
	321	+Mnemonic_STOS, // Store string
	322	+
	323	+//
	324	+Mnemonic_WAIT, // Check pending pending unmasked floating-point exception
	325	+Mnemonic_PADDB, //!< Add packed byte integers
	326	+Mnemonic_PADDW, //!< Add packed word integers
	327	+Mnemonic_PADDD, //!< Add packed doubleword integers
	328	+Mnemonic_PSUBB, //!< Subtract packed byte integers
	329	+Mnemonic_PSUBW, //!< Subtract packed word integers
	330	+Mnemonic_PSUBD, //!< Subtract packed doubleword integers
	331	+Mnemonic_PMULLW, //!< Multiply packed word integers
	332	+Mnemonic_PMULLD, //!< Multiply packed doubleword integers
	333	+Mnemonic_PSLLW, //!< Shift words left and shift in 0s
	334	+Mnemonic_PSLLD, //!< Shift doublewords left and shift in 0s
	335	+Mnemonic_PSRAW, //!< Shift words right and shift in sign bits
	336	+Mnemonic_PSRAD, //!< Shift doublewords right and shift in sign bits
	337	+Mnemonic_PSRLW, //!< Shift words right and shift in 0s
	338	+Mnemonic_PSRLD, //!< Shift doublewords right and shift in 0s
	339	+Mnemonic_PMOVSXBW, //!< Sign extend 8 packed signed 8-bit integers in the low 8 bytes to 8 packed signed 16-bit integers
	340	+Mnemonic_PSHUFB, //!< Shuffle bytes
	341	+Mnemonic_PSHUFD, //!< Shuffle doublewords
	342	+Mnemonic_PSHUFLW, //!< Shuffle packed low words
	343	+Mnemonic_PSHUFHW, //!< Shuffle packed high words
	344	+Mnemonic_PHADDSW, //!< Add 16-bit signed integers horizontally, then pack saturated integers
	345	+Mnemonic_PHADDW, //!< Add 16-bit signed integers horizontally, then pack
	346	+Mnemonic_PHADDD, //!< Add 32-bit signed integers horizontally, then pack
	347	+Mnemonic_PHSUBSW, //!< Subtract 16-bit signed integers horizontally, then pack saturated integers
	348	+Mnemonic_PHSUBW, //!< Subtract 16-bit signed integers horizontally, then pack
	349	+Mnemonic_PHSUBD, //!< Subtract 32-bit signed integers horizontally, then pack
	350	+Mnemonic_PEXTRB, //!< Extract a byte integer value from xmm
	351	+Mnemonic_PEXTRW, //!< Extract a word integer value from xmm
	352	+Mnemonic_PEXTRD, //!< Extract a doubleword integer value from xmm
	353	+Mnemonic_MOVDQA, //!< Move aligned double quadword
	354	+Mnemonic_SHUFPS, //!< Shuffle single words
	355	+Mnemonic_MOVAPS, //!< Move aligned single word
	356	+
	357	+//
	358	+Mnemonic_Count
	359	+} Mnemonic;
	360	+
	361	+#undef CCM
	362	+
	363	+ENCODER_NAMESPACE_END
	364	+
	365	+#endif // ifndef _ENCODER_DEFS_EXT_H_

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/enc_prvt.h

		@@ -0,0 +1,382 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+#ifndef __ENC_PRVT_H_INCLUDED__
	21	+#define __ENC_PRVT_H_INCLUDED__
	22	+
	23	+#include "enc_base.h"
	24	+
	25	+ENCODER_NAMESPACE_START
	26	+/*
	27	+ * @file
	28	+ * @brief Contains some definitions/constants and other stuff used by the
	29	+ * Encoder internally.
	30	+ */
	31	+
	32	+enum OpcodeByteKind {
	33	+ //OpcodeByteKind_Opcode = 0x0000,
	34	+ OpcodeByteKind_ZeroOpcodeByte = 0x0100,
	35	+ //
	36	+ // The names _SlashR, _SlahsNum, _ib, _iw, etc
	37	+ // represent the appropriate abbreviations used
	38	+ // in the mnemonic descriptions in the Intel's arch manual.
	39	+ //
	40	+ OpcodeByteKind_SlashR = 0x0200,
	41	+ OpcodeByteKind_SlashNum = 0x0300,
	42	+ OpcodeByteKind_ib = 0x0400,
	43	+ OpcodeByteKind_iw = 0x0500,
	44	+ OpcodeByteKind_id = 0x0600,
	45	+#ifdef _EM64T_
	46	+ OpcodeByteKind_io = 0x0700,
	47	+#endif
	48	+ OpcodeByteKind_cb = 0x0800,
	49	+ OpcodeByteKind_cw = 0x0900,
	50	+ OpcodeByteKind_cd = 0x0A00,
	51	+ //OpcodeByteKind_cp = 0x0B00,
	52	+ //OpcodeByteKind_co = 0x0C00,
	53	+ //OpcodeByteKind_ct = 0x0D00,
	54	+
	55	+ OpcodeByteKind_rb = 0x0E00,
	56	+ OpcodeByteKind_rw = 0x0F00,
	57	+ OpcodeByteKind_rd = 0x1000,
	58	+#ifdef _EM64T_
	59	+ OpcodeByteKind_ro = 0x1100,
	60	+ //OpcodeByteKind_REX = 0x1200,
	61	+ OpcodeByteKind_REX_W = 0x1300,
	62	+#endif
	63	+ OpcodeByteKind_plus_i = 0x1400,
	64	+ /**
	65	+ * a special marker, means 'no opcode on the given position'
	66	+ * used in opcodes array, to specify the empty slot, say
	67	+ * to fill an em64t-specific opcode on ia32.
	68	+ * last 'e' made lowercase to avoid a mess with 'F' in
	69	+ * OpcodeByteKind_LAST .
	70	+ */
	71	+ OpcodeByteKind_EMPTY = 0xFFFE,
	72	+ /**
	73	+ * a special marker, means 'no more opcodes in the array'
	74	+ * used in in opcodes array to show that there are no more
	75	+ * opcodes in the array for a given mnemonic.
	76	+ */
	77	+ OpcodeByteKind_LAST = 0xFFFF,
	78	+ /**
	79	+ * a mask to extract the OpcodeByteKind
	80	+ */
	81	+ OpcodeByteKind_KindMask = 0xFF00,
	82	+ /**
	83	+ * a mask to extract the opcode byte when presented
	84	+ */
	85	+ OpcodeByteKind_OpcodeMask = 0x00FF
	86	+};
	87	+
	88	+#ifdef USE_ENCODER_DEFINES
	89	+
	90	+#define N {0, 0, 0, 0 }
	91	+#define U {1, 0, 1, OpndRole_Use }
	92	+#define D {1, 1, 0, OpndRole_Def }
	93	+#define DU {1, 1, 1, OpndRole_Def\|OpndRole_Use }
	94	+
	95	+#define U_U {2, 0, 2, OpndRole_Use<<2 \| OpndRole_Use }
	96	+#define D_U {2, 1, 1, OpndRole_Def<<2 \| OpndRole_Use }
	97	+#define D_DU {2, 2, 1, OpndRole_Def<<2 \| (OpndRole_Def\|OpndRole_Use) }
	98	+#define DU_U {2, 1, 2, ((OpndRole_Def\|OpndRole_Use)<<2 \| OpndRole_Use) }
	99	+#define DU_DU {2, 2, 2, ((OpndRole_Def\|OpndRole_Use)<<2 \| (OpndRole_Def\|OpndRole_Use)) }
	100	+
	101	+#define DU_DU_DU {3, 3, 3, ((OpndRole_Def\|OpndRole_Use)<<4) \| ((OpndRole_Def\|OpndRole_Use)<<2) \| (OpndRole_Def\|OpndRole_Use) }
	102	+#define DU_DU_U {3, 2, 3, (((OpndRole_Def\|OpndRole_Use)<<4) \| ((OpndRole_Def\|OpndRole_Use)<<2) \| OpndRole_Use) }
	103	+#define D_DU_U {3, 2, 2, (((OpndRole_Def)<<4) \| ((OpndRole_Def\|OpndRole_Use)<<2) \| OpndRole_Use) }
	104	+#define D_U_U {3, 1, 2, (((OpndRole_Def)<<4) \| ((OpndRole_Use)<<2) \| OpndRole_Use) }
	105	+
	106	+// Special encoding of 0x00 opcode byte. Note: it's all O-s, not zeros.
	107	+#define OxOO OpcodeByteKind_ZeroOpcodeByte
	108	+
	109	+#define Size16 InstPrefix_OpndSize
	110	+
	111	+#define _r OpcodeByteKind_SlashR
	112	+
	113	+#define _0 OpcodeByteKind_SlashNum\|0
	114	+#define _1 OpcodeByteKind_SlashNum\|1
	115	+#define _2 OpcodeByteKind_SlashNum\|2
	116	+#define _3 OpcodeByteKind_SlashNum\|3
	117	+#define _4 OpcodeByteKind_SlashNum\|4
	118	+#define _5 OpcodeByteKind_SlashNum\|5
	119	+#define _6 OpcodeByteKind_SlashNum\|6
	120	+#define _7 OpcodeByteKind_SlashNum\|7
	121	+
	122	+// '+i' for floating-point instructions
	123	+#define _i OpcodeByteKind_plus_i
	124	+
	125	+
	126	+#define ib OpcodeByteKind_ib
	127	+#define iw OpcodeByteKind_iw
	128	+#define id OpcodeByteKind_id
	129	+
	130	+#define cb OpcodeByteKind_cb
	131	+#define cw OpcodeByteKind_cw
	132	+#define cd OpcodeByteKind_cd
	133	+
	134	+#define rb OpcodeByteKind_rb
	135	+#define rw OpcodeByteKind_rw
	136	+#define rd OpcodeByteKind_rd
	137	+
	138	+#define AL {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_AL}
	139	+#define AH {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_AH}
	140	+#define AX {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_AX}
	141	+#define EAX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EAX}
	142	+#ifdef _EM64T_
	143	+ #define RAX {OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RAX }
	144	+#endif
	145	+
	146	+#define CL {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_CL}
	147	+#define ECX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_ECX}
	148	+#ifdef _EM64T_
	149	+ #define RCX {OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RCX}
	150	+#endif
	151	+
	152	+#define DX {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_DX}
	153	+#define EDX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EDX}
	154	+#ifdef _EM64T_
	155	+ #define RDX { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RDX }
	156	+#endif
	157	+
	158	+#define ESI {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_ESI}
	159	+#ifdef _EM64T_
	160	+ #define RSI { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RSI }
	161	+#endif
	162	+
	163	+#define EDI {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EDI}
	164	+#ifdef _EM64T_
	165	+ #define RDI { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RDI }
	166	+#endif
	167	+
	168	+#define r8 {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_Null}
	169	+#define r16 {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_Null}
	170	+#define r32 {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_Null}
	171	+#ifdef _EM64T_
	172	+ #define r64 { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_Null }
	173	+#endif
	174	+
	175	+#define r_m8 {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_8, OpndExt_Any, RegName_Null}
	176	+#define r_m16 {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_16, OpndExt_Any, RegName_Null}
	177	+#define r_m32 {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_32, OpndExt_Any, RegName_Null}
	178	+
	179	+#define r_m8s {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_8, OpndExt_Signed, RegName_Null}
	180	+#define r_m16s {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_16, OpndExt_Signed, RegName_Null}
	181	+#define r_m32s {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_32, OpndExt_Signed, RegName_Null}
	182	+
	183	+#define r_m8u {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_8, OpndExt_Zero, RegName_Null}
	184	+#define r_m16u {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_16, OpndExt_Zero, RegName_Null}
	185	+#define r_m32u {(OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_32, OpndExt_Zero, RegName_Null}
	186	+
	187	+//'m' was only used in LEA mnemonic, but is replaced with
	188	+// set of exact sizes. See more comments for LEA instruction in TheTable.
	189	+//#define m {OpndKind_Mem, OpndSize_Null, RegName_Null}
	190	+#define m8 {OpndKind_Mem, OpndSize_8, OpndExt_Any, RegName_Null}
	191	+#define m16 {OpndKind_Mem, OpndSize_16, OpndExt_Any, RegName_Null}
	192	+#define m32 {OpndKind_Mem, OpndSize_32, OpndExt_Any, RegName_Null}
	193	+#define m64 {OpndKind_Mem, OpndSize_64, OpndExt_Any, RegName_Null}
	194	+#ifdef _EM64T_
	195	+ #define r_m64 { (OpndKind)(OpndKind_GPReg\|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null }
	196	+#endif
	197	+
	198	+#define imm8 {OpndKind_Imm, OpndSize_8, OpndExt_Any, RegName_Null}
	199	+#define imm16 {OpndKind_Imm, OpndSize_16, OpndExt_Any, RegName_Null}
	200	+#define imm32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
	201	+
	202	+#define imm8s {OpndKind_Imm, OpndSize_8, OpndExt_Signed, RegName_Null}
	203	+#define imm16s {OpndKind_Imm, OpndSize_16, OpndExt_Signed, RegName_Null}
	204	+#define imm32s {OpndKind_Imm, OpndSize_32, OpndExt_Signed, RegName_Null}
	205	+
	206	+#define imm8u {OpndKind_Imm, OpndSize_8, OpndExt_Zero, RegName_Null}
	207	+#define imm16u {OpndKind_Imm, OpndSize_16, OpndExt_Zero, RegName_Null}
	208	+#define imm32u {OpndKind_Imm, OpndSize_32, OpndExt_Zero, RegName_Null}
	209	+
	210	+#ifdef _EM64T_
	211	+ #define imm64 {OpndKind_Imm, OpndSize_64, OpndExt_Any, RegName_Null }
	212	+#endif
	213	+
	214	+//FIXME: moff-s are in fact memory refs, but presented as immediate.
	215	+// Need to specify this in OpndDesc.
	216	+#define moff8 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
	217	+#define moff16 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
	218	+#define moff32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
	219	+#ifdef _EM64T_
	220	+ #define moff64 {OpndKind_Imm, OpndSize_64, OpndExt_Any, RegName_Null}
	221	+#endif
	222	+
	223	+
	224	+#define rel8 {OpndKind_Imm, OpndSize_8, OpndExt_Any, RegName_Null}
	225	+#define rel16 {OpndKind_Imm, OpndSize_16, OpndExt_Any, RegName_Null}
	226	+#define rel32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
	227	+
	228	+#define mm64 {OpndKind_MMXReg, OpndSize_64, OpndExt_Any, RegName_Null}
	229	+#define mm_m64 {(OpndKind)(OpndKind_MMXReg\|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null}
	230	+
	231	+#define xmm64 {OpndKind_XMMReg, OpndSize_64, OpndExt_Any, RegName_Null}
	232	+#define xmm_m64 {(OpndKind)(OpndKind_XMMReg\|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null}
	233	+
	234	+#define xmm32 {OpndKind_XMMReg, OpndSize_32, OpndExt_Any, RegName_Null}
	235	+#define xmm_m32 {(OpndKind)(OpndKind_XMMReg\|OpndKind_Mem), OpndSize_32, OpndExt_Any, RegName_Null}
	236	+
	237	+#define FP0S {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_FP0S}
	238	+#define FP0D {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_FP0D}
	239	+#define FP1S {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_FP1S}
	240	+#define FP1D {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_FP1D}
	241	+#define fp32 {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_Null}
	242	+#define fp64 {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_Null}
	243	+
	244	+#ifdef _EM64T_
	245	+ #define io OpcodeByteKind_io
	246	+ #define REX_W OpcodeByteKind_REX_W
	247	+
	248	+#endif
	249	+
	250	+#endif // USE_ENCODER_DEFINES
	251	+
	252	+/**
	253	+ * @brief Represents the REX part of instruction.
	254	+ */
	255	+struct Rex {
	256	+ unsigned char b : 1;
	257	+ unsigned char x : 1;
	258	+ unsigned char r : 1;
	259	+ unsigned char w : 1;
	260	+ unsigned char dummy : 4; // must be '0100'b
	261	+ unsigned int :24;
	262	+};
	263	+
	264	+/**
	265	+ * @brief Describes SIB (scale,index,base) byte.
	266	+ */
	267	+struct SIB {
	268	+ unsigned char base:3;
	269	+ unsigned char index:3;
	270	+ unsigned char scale:2;
	271	+ unsigned int padding:24;
	272	+};
	273	+/**
	274	+ * @brief Describes ModRM byte.
	275	+ */
	276	+struct ModRM
	277	+{
	278	+ unsigned char rm:3;
	279	+ unsigned char reg:3;
	280	+ unsigned char mod:2;
	281	+ unsigned int padding:24;
	282	+};
	283	+
	284	+
	285	+
	286	+/**
	287	+* exactly the same as EncoderBase::OpcodeDesc, but also holds info about
	288	+* platform on which the opcode is applicable.
	289	+*/
	290	+struct OpcodeInfo {
	291	+ enum platform {
	292	+ /// an opcode is valid on all platforms
	293	+ all,
	294	+ // opcode is valid on IA-32 only
	295	+ em64t,
	296	+ // opcode is valid on Intel64 only
	297	+ ia32,
	298	+ // opcode is added for the sake of disassembling, should not be used in encoding
	299	+ decoder,
	300	+ // only appears in master table, replaced with 'decoder' in hashed version
	301	+ decoder32,
	302	+ // only appears in master table, replaced with 'decoder' in hashed version
	303	+ decoder64,
	304	+ };
	305	+ platform platf;
	306	+ unsigned opcode[4+1+1];
	307	+ EncoderBase::OpndDesc opnds[EncoderBase::MAX_NUM_OPCODE_OPERANDS];
	308	+ EncoderBase::OpndRolesDesc roles;
	309	+};
	310	+
	311	+/**
	312	+ * @defgroup MF_ Mnemonic flags
	313	+*/
	314	+
	315	+ /**
	316	+ * Operation has no special properties.
	317	+ */
	318	+#define MF_NONE (0x00000000)
	319	+ /**
	320	+ * Operation affects flags
	321	+ */
	322	+#define MF_AFFECTS_FLAGS (0x00000001)
	323	+ /**
	324	+ * Operation uses flags - conditional operations, ADC/SBB/ETC
	325	+ */
	326	+#define MF_USES_FLAGS (0x00000002)
	327	+ /**
	328	+ * Operation is conditional - MOVcc/SETcc/Jcc/ETC
	329	+ */
	330	+#define MF_CONDITIONAL (0x00000004)
	331	+/**
	332	+ * Operation is symmetric - its args can be swapped (ADD/MUL/etc).
	333	+ */
	334	+#define MF_SYMMETRIC (0x00000008)
	335	+/**
	336	+ * Operation is XOR-like - XOR, SUB - operations of 'arg,arg' is pure def,
	337	+ * without use.
	338	+ */
	339	+#define MF_SAME_ARG_NO_USE (0x00000010)
	340	+
	341	+///@} // ~MNF
	342	+
	343	+/**
	344	+ * @see same structure as EncoderBase::MnemonicDesc, but carries
	345	+ * MnemonicInfo::OpcodeInfo[] instead of OpcodeDesc[].
	346	+ * Only used during prebuilding the encoding tables, thus it's hidden under
	347	+ * the appropriate define.
	348	+ */
	349	+struct MnemonicInfo {
	350	+ /**
	351	+ * The mnemonic itself
	352	+ */
	353	+ Mnemonic mn;
	354	+ /**
	355	+ * Various characteristics of mnemonic.
	356	+ * @see MF_
	357	+ */
	358	+ unsigned flags;
	359	+ /**
	360	+ * Number of args/des/uses/roles for the operation. For the operations
	361	+ * which may use different number of operands (i.e. IMUL/SHL) use the
	362	+ * most common value, or leave '0' if you are sure this info is not
	363	+ * required.
	364	+ */
	365	+ EncoderBase::OpndRolesDesc roles;
	366	+ /**
	367	+ * Print name of the mnemonic
	368	+ */
	369	+ const char * name;
	370	+ /**
	371	+ * Array of opcodes.
	372	+ * The terminating opcode description always have OpcodeByteKind_LAST
	373	+ * at the opcodes[i].opcode[0].
	374	+ * The size of '25' has nothing behind it, just counted the max
	375	+ * number of opcodes currently used (MOV instruction).
	376	+ */
	377	+ OpcodeInfo opcodes[25];
	378	+};
	379	+
	380	+ENCODER_NAMESPACE_END
	381	+
	382	+#endif // ~__ENC_PRVT_H_INCLUDED__

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/enc_tabl.cpp

		@@ -0,0 +1,2164 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+
	21	+
	22	+#include <assert.h>
	23	+#include <stdio.h>
	24	+#include <stdlib.h> //qsort
	25	+#include <string.h>
	26	+#include <memory.h>
	27	+#include <errno.h>
	28	+#include <stdlib.h>
	29	+
	30	+
	31	+// need to use EM64T-specifics - new registers, defines from enc_prvt, etc...
	32	+#if !defined(_EM64T_)
	33	+ #define UNDEF_EM64T
	34	+ #define _EM64T_
	35	+#endif
	36	+
	37	+#define USE_ENCODER_DEFINES
	38	+#include "enc_prvt.h"
	39	+#include "enc_defs.h"
	40	+
	41	+#ifdef UNDEF_EM64T
	42	+ #undef _EM64T_
	43	+#endif
	44	+
	45	+//Android x86
	46	+#if 0 //!defined(_HAVE_MMX_)
	47	+ #define Mnemonic_PADDQ Mnemonic_Null
	48	+ #define Mnemonic_PAND Mnemonic_Null
	49	+ #define Mnemonic_POR Mnemonic_Null
	50	+ #define Mnemonic_PSUBQ Mnemonic_Null
	51	+#endif
	52	+
	53	+ENCODER_NAMESPACE_START
	54	+
	55	+
	56	+EncoderBase::MnemonicDesc EncoderBase::mnemonics[Mnemonic_Count];
	57	+EncoderBase::OpcodeDesc EncoderBase::opcodes[Mnemonic_Count][MAX_OPCODES];
	58	+unsigned char EncoderBase::opcodesHashMap[Mnemonic_Count][HASH_MAX];
	59	+
	60	+
	61	+/**
	62	+ * @file
	63	+ * @brief 'Master' copy of encoding data.
	64	+ */
	65	+
	66	+/*
	67	+This file contains a 'master copy' of encoding table - this is the info used
	68	+by both generator of native instructions (EncoderBase class) and by
	69	+disassembling routines. The first one uses an info how to encode the
	70	+instruction, and the second does an opposite - several separate tables are
	71	+built at runtime from this main table.
	72	+
	73	+=============================================================================
	74	+
	75	+The table was designed for easy support and maintenance. Thus, it was made as
	76	+much close as possible to the Intel's IA32 Architecture Manual descriptions.
	77	+The info is based on the latest (at the moment of writing) revision which is
	78	+June 2005, order number 253666-016.
	79	+
	80	+Normally, almost all of opcodes in the 'master' table represented exactly as
	81	+they are shown in the Intel's Architecture manual (well, with slashes
	82	+replaced with underscore). There are several exclusions especially marked.
	83	+
	84	+Normally, to add an opcode/instruction, one only need to copy the whole
	85	+string from the manual, and simply replace '/' with '_'.
	86	+
	87	+I.e., TheManual reads for DEC:
	88	+ (1) FE /1 DEC r/m8 Valid Valid Decrement r/m8 by 1.
	89	+ (2) REX + FE /1 DEC r/m8* Valid N.E. Decrement r/m8 by 1.
	90	+ (3) REX.W + FF /1 DEC r/m64 Valid N.E. Decrement r/m64 by 1.
	91	+
	92	+1. Note, that there is no need to explicitly specify REX-based opcodes for
	93	+ instruction to handle additional registers on EM64T:
	94	+
	95	+ (1) FE /1 DEC r/m8 Valid Valid Decrement r/m8 by 1.
	96	+ (3) REX.W + FF /1 DEC r/m64 Valid N.E. Decrement r/m64 by 1.
	97	+
	98	+2. Copy the string, strip off the text comments, replace '/'=>'_'. Note, that
	99	+ the second line is for EM64T only
	100	+
	101	+ (1) FE /1 DEC r/m8
	102	+ (3) REX.W + FF /1 DEC r/m64
	103	+
	104	+3. Fill out the mnemonic, opcode parameters parts
	105	+
	106	+ BEGIN_MNEMONIC(DEC, MF_AFFECTS_FLAGS, DU)
	107	+ BEGIN_OPCODES()
	108	+ {OpcodeInfo::all, {0xFE, _1}, {r_m8}, DU },
	109	+ {OpcodeInfo::em64t, {REX_W, 0xFF, _1}, {r_m64}, DU },
	110	+
	111	+ DU here - one argument, it's used and defined
	112	+
	113	+4. That's it, that simple !
	114	+
	115	+The operand roles (DU here) are used by Jitrino's optimizing engine to
	116	+perform data flow analysis. It also used to store/obtain number of operands.
	117	+
	118	+Special cases are (see the table for details):
	119	+LEA
	120	+Some FPU operations (i.e. FSTP)
	121	+packed things (XORPD, XORPS, CVTDQ2PD, CVTTPD2DQ)
	122	+
	123	+Also, the Jitrino's needs require to specify all operands - including
	124	+implicit ones (see IMUL).
	125	+
	126	+The master table iself does not need to be ordered - it's get sorted before
	127	+processing. It's recommended (though it's not a law) to group similar
	128	+instructions together - i.e. FPU instructions, MMX, etc.
	129	+
	130	+=============================================================================
	131	+
	132	+The encoding engine builds several tables basing on the 'master' one (here
	133	+'mnemonic' is a kind of synonim for 'instruction'):
	134	+
	135	+- list of mnemonics which holds general info about instructions
	136	+ (EncoderBase::mnemonics)
	137	+- an array of opcodes descriptions (EncodeBase::opcodes)
	138	+- a mapping between a hash value and an opcode description record for a given
	139	+ mnemonic (EncoderBase::opcodesHashMap)
	140	+
	141	+The EncoderBase::mnemonics holds general info about instructions.
	142	+The EncoderBase::opcodesHashMap is used for fast opcode selection basing on
	143	+a hash value.
	144	+The EncodeBase::opcodes is used for the encoding itself.
	145	+
	146	+=============================================================================
	147	+The hash value is calculated and used as follows:
	148	+
	149	+JIT-ted code uses the following operand sizes: 8-, 16-, 32- and 64-bits and
	150	+size for an operand can be encoded in just 2 bits.
	151	+
	152	+The following operand locations are available: one of registers - GP, FP,
	153	+MMX, XMM (not taking segment registers), a memory and an immediate, which
	154	+gives us 6 variants and can be enumerated in 3 bits.
	155	+
	156	+As a grand total, the the whole operand's info needed for opcode selection
	157	+can be packed in 5 bits. Taking into account the IMUL mnemonic with its 3
	158	+operands (including implicit ones), we're getting 15 bits per instruction and
	159	+the complete table is about 32768 items per single instruction.
	160	+
	161	+Seems too many, but luckily, the 15 bit limit will never be reached: the
	162	+worst case is IMUL with its 3 operands:
	163	+(IMUL r64, r/m64, imm32)/(IMUL r32, r/m32, imm32).
	164	+So, assigning lowest value to GP register, the max value of hash can be
	165	+reduced.
	166	+
	167	+The hash values to use are:
	168	+sizes:
	169	+ 8 -> 11
	170	+ 16 -> 10
	171	+ 32 -> 01
	172	+ 64 -> 00
	173	+locations:
	174	+ gp reg -> 000
	175	+ memory -> 001
	176	+ fp reg -> 010
	177	+ mmx reg -> 011
	178	+ xmm reg -> 100
	179	+ immediate -> 101
	180	+and the grand total for the worst case would be
	181	+[ GP 32] [GP 32] [Imm 32]
	182	+[000-01] [000-01] [101 01] = 1077
	183	+
	184	+However, the implicit operands adds additional value, and the worstest case
	185	+is 'SHLD r_m32, r32, CL=r8'. This gives us the maximum number of:
	186	+
	187	+[mem 32] [GP 32] [GP 8b]
	188	+[001-01] [000-01] [000-11] = 5155.
	189	+
	190	+The max number is pretty big and the hash functions is quite rare, thus it
	191	+is not resonable to use a direct addressing i.e.
	192	+OpcodeDesc[mnemonic][hash_code] - there would be a huge waste of space.
	193	+
	194	+Instead, we use a kind of mapping: the opcodes info is stored in packed
	195	+(here: non rare) array. The max number of opcodes will not exceed 255 for
	196	+each instruction. And we have an index array in which we store a mapping
	197	+between a hash code value and opcode position for each given instruction.
	198	+
	199	+Sounds a bit sophisticated, but in real is simple, the opcode gets selected
	200	+in 2 simple steps:
	201	+
	202	+1. Select [hash,mnemonic] => 'n'.
	203	+
	204	+The array is pretty rare - many cells contain 0xFF which
	205	+means 'invalid hash - no opcode with given characteristics'
	206	+
	207	+char EnbcoderBase::opcodesHashMap[Mnemonic_Count][HASH_MAX] =
	208	+
	209	++----+----+----+----+----+----+
	210	+\| 00 \| 05 \| FF \| FF \| 03 \| 12 \| ...
	211	+\|---------+-------------------+
	212	+\| 12 \| FF \| FF \| n \| 04 \| 25 \| ... <- Mnemonic
	213	+\|-----------------------------+
	214	+\| FF \| 11 \| FF \| 10 \| 13 \| .. \| ...
	215	++-----------------------------+
	216	+ ... ^
	217	+ \|
	218	+ hash
	219	+
	220	+2. Select [n,mnemonic] => 'opcode_desc11'
	221	+
	222	+OpcodeDesc EncoderBase::opcodes[Mnemonic_Count][MAX_OPCODES] =
	223	+
	224	++---------------+---------------+---------------+---------------+
	225	+\| opcode_desc00 \| opcode_desc01 \| opcode_desc02 \| last_opcode \| ...
	226	++---------------+---------------+---------------+---------------+
	227	+\| opcode_desc10 \| opcode_desc11 \| last_opcode \| xxx \| <- Mnemonic
	228	++---------------+---------------+---------------+---------------+
	229	+\| opcode_desc20 \| opcode_desc21 \| opcode_desc22 \| opcode_desc23 \| ...
	230	++---------------+---------------+---------------+---------------+
	231	+ ...
	232	+ ^
	233	+ \|
	234	+ n
	235	+
	236	+Now, use 'opcode_desc11'.
	237	+
	238	+=============================================================================
	239	+The array of opcodes descriptions (EncodeBase::opcodes) is specially prepared
	240	+to maximize performance - the EncoderBase::encode() is quite hot on client
	241	+applications for the Jitrino/Jitrino.JET.
	242	+The preparation is that opcode descriptions from the 'master' encoding table
	243	+are preprocessed and a special set of OpcodeDesc prepared:
	244	+First, the 'raw' opcode bytes are extracted. Here, 'raw' means the bytes that
	245	+do not depened on any operands values, do not require any analysis and can be
	246	+simply copied into the output buffer during encoding. Also, number of these
	247	+'raw' bytes is counted. The fields are OpcodeDesc::opcode and
	248	+OpcodeDesc::opcode_len.
	249	+
	250	+Then the fisrt non-implicit operand found and its index is stored in
	251	+OpcodeDesc::first_opnd.
	252	+
	253	+The bytes that require processing and analysis ('/r', '+i', etc) are
	254	+extracted and stored in OpcodeDesc::aux0 and OpcodeDesc::aux1 fields.
	255	+
	256	+Here, a special trick is performed:
	257	+ Some opcodes have register/memory operand, but this is not reflected in
	258	+ opcode column - for example, (MOVQ xmm64, xmm_m64). In this case, a fake
	259	+ '_r' added to OpcodeDesc::aux field.
	260	+ Some other opcodes have immediate operands, but this is again not
	261	+ reflected in opcode column - for example, CALL cd or PUSH imm32.
	262	+ In this case, a fake '/cd' or fake '/id' added to appropriate
	263	+ OpcodeDesc::aux field.
	264	+
	265	+The OpcodeDesc::last is non-zero for the final OpcodeDesc record (which does
	266	+not have valid data itself).
	267	+*/
	268	+
	269	+// TODO: To extend flexibility, replace bool fields in MnemonicDesc &
	270	+// MnemonicInfo with a set of flags packed into integer field.
	271	+
	272	+unsigned short EncoderBase::getHash(const OpcodeInfo* odesc)
	273	+{
	274	+ /*
	275	+ NOTE: any changes in the hash computation must be stricty balanced with
	276	+ EncoderBase::Operand::hash_it and EncoderBase::Operands()
	277	+ */
	278	+ unsigned short hash = 0;
	279	+ // The hash computation, uses fast way - table selection instead of if-s.
	280	+ if (odesc->roles.count > 0) {
	281	+ OpndKind kind = odesc->opnds[0].kind;
	282	+ OpndSize size = odesc->opnds[0].size;
	283	+ assert(kind<COUNTOF(kind_hash));
	284	+ assert(size<COUNTOF(size_hash));
	285	+ hash = get_kind_hash(kind) \| get_size_hash(size);
	286	+ }
	287	+
	288	+ if (odesc->roles.count > 1) {
	289	+ OpndKind kind = odesc->opnds[1].kind;
	290	+ OpndSize size = odesc->opnds[1].size;
	291	+ assert(kind<COUNTOF(kind_hash));
	292	+ assert(size<COUNTOF(size_hash));
	293	+ hash = (hash<<HASH_BITS_PER_OPERAND) \|
	294	+ (get_kind_hash(kind) \| get_size_hash(size));
	295	+ }
	296	+
	297	+ if (odesc->roles.count > 2) {
	298	+ OpndKind kind = odesc->opnds[2].kind;
	299	+ OpndSize size = odesc->opnds[2].size;
	300	+ assert(kind<COUNTOF(kind_hash));
	301	+ assert(size<COUNTOF(size_hash));
	302	+ hash = (hash<<HASH_BITS_PER_OPERAND) \|
	303	+ (get_kind_hash(kind) \| get_size_hash(size));
	304	+ }
	305	+ assert(hash <= HASH_MAX);
	306	+ return hash;
	307	+}
	308	+
	309	+
	310	+#define BEGIN_MNEMONIC(mn, flags, roles) \
	311	+ { Mnemonic_##mn, flags, roles, #mn,
	312	+#define END_MNEMONIC() },
	313	+#define BEGIN_OPCODES() {
	314	+#define END_OPCODES() { OpcodeInfo::all, {OpcodeByteKind_LAST}, {}, {0, 0, 0, 0}}}
	315	+
	316	+
	317	+static MnemonicInfo masterEncodingTable[] = {
	318	+//
	319	+// Null
	320	+//
	321	+BEGIN_MNEMONIC(Null, MF_NONE, N)
	322	+BEGIN_OPCODES()
	323	+END_OPCODES()
	324	+END_MNEMONIC()
	325	+
	326	+BEGIN_MNEMONIC(LAHF, MF_USES_FLAGS, D)
	327	+BEGIN_OPCODES()
	328	+// TheManual says it's not always supported in em64t mode, thus excluding it
	329	+ {OpcodeInfo::ia32, {0x9F}, {EAX}, D },
	330	+END_OPCODES()
	331	+END_MNEMONIC()
	332	+//
	333	+// ALU mnemonics - add, adc, or, xor, and, cmp, sub, sbb
	334	+// as they differ only in the opcode extention (/digit) number and
	335	+// in which number the opcode start from, the opcode definitions
	336	+// for those instructions are packed together
	337	+//
	338	+// The 'opcode_starts_from' and 'opcode_ext' in DEFINE_ALU_OPCODES()
	339	+// are enough to define OpcodeInfo::all opcodes and the 'first_opcode'
	340	+// parameter is only due to ADD instruction, which requires an zero opcode
	341	+// byte which, in turn, is coded especially in the current coding scheme.
	342	+//
	343	+
	344	+#define DEFINE_ALU_OPCODES( opc_ext, opcode_starts_from, first_opcode, def_use ) \
	345	+\
	346	+ {OpcodeInfo::decoder, {opcode_starts_from + 4, ib}, {AL, imm8}, DU_U },\
	347	+ {OpcodeInfo::decoder, {Size16, opcode_starts_from + 5, iw}, {AX, imm16}, DU_U },\
	348	+ {OpcodeInfo::decoder, {opcode_starts_from + 5, id}, {EAX, imm32}, DU_U },\
	349	+ {OpcodeInfo::decoder64, {REX_W, opcode_starts_from+5, id}, {RAX, imm32s},DU_U },\
	350	+\
	351	+ {OpcodeInfo::all, {0x80, opc_ext, ib}, {r_m8, imm8}, def_use },\
	352	+ {OpcodeInfo::all, {Size16, 0x81, opc_ext, iw}, {r_m16, imm16}, def_use },\
	353	+ {OpcodeInfo::all, {0x81, opc_ext, id}, {r_m32, imm32}, def_use },\
	354	+ {OpcodeInfo::em64t, {REX_W, 0x81, opc_ext, id}, {r_m64, imm32s}, def_use },\
	355	+\
	356	+ {OpcodeInfo::all, {Size16, 0x83, opc_ext, ib}, {r_m16, imm8s}, def_use },\
	357	+ {OpcodeInfo::all, {0x83, opc_ext, ib}, {r_m32, imm8s}, def_use },\
	358	+ {OpcodeInfo::em64t, {REX_W, 0x83, opc_ext, ib}, {r_m64, imm8s}, def_use },\
	359	+\
	360	+ {OpcodeInfo::all, {first_opcode, _r}, {r_m8, r8}, def_use },\
	361	+\
	362	+ {OpcodeInfo::all, {Size16, opcode_starts_from+1, _r}, {r_m16, r16}, def_use },\
	363	+ {OpcodeInfo::all, {opcode_starts_from+1, _r}, {r_m32, r32}, def_use },\
	364	+ {OpcodeInfo::em64t, {REX_W, opcode_starts_from+1, _r}, {r_m64, r64}, def_use },\
	365	+\
	366	+ {OpcodeInfo::all, {opcode_starts_from+2, _r}, {r8, r_m8}, def_use },\
	367	+\
	368	+ {OpcodeInfo::all, {Size16, opcode_starts_from+3, _r}, {r16, r_m16}, def_use },\
	369	+ {OpcodeInfo::all, {opcode_starts_from+3, _r}, {r32, r_m32}, def_use },\
	370	+ {OpcodeInfo::em64t, {REX_W, opcode_starts_from+3, _r}, {r64, r_m64}, def_use },
	371	+
	372	+BEGIN_MNEMONIC(ADD, MF_AFFECTS_FLAGS\|MF_SYMMETRIC, DU_U)
	373	+BEGIN_OPCODES()
	374	+ DEFINE_ALU_OPCODES(_0, 0x00, OxOO, DU_U )
	375	+END_OPCODES()
	376	+END_MNEMONIC()
	377	+
	378	+BEGIN_MNEMONIC(OR, MF_AFFECTS_FLAGS\|MF_SYMMETRIC, DU_U)
	379	+BEGIN_OPCODES()
	380	+ DEFINE_ALU_OPCODES(_1, 0x08, 0x08, DU_U )
	381	+END_OPCODES()
	382	+END_MNEMONIC()
	383	+
	384	+BEGIN_MNEMONIC(ADC, MF_AFFECTS_FLAGS\|MF_USES_FLAGS\|MF_SYMMETRIC, DU_U)
	385	+BEGIN_OPCODES()
	386	+ DEFINE_ALU_OPCODES(_2, 0x10, 0x10, DU_U )
	387	+END_OPCODES()
	388	+END_MNEMONIC()
	389	+
	390	+BEGIN_MNEMONIC(SBB, MF_AFFECTS_FLAGS\|MF_USES_FLAGS, DU_U)
	391	+BEGIN_OPCODES()
	392	+ DEFINE_ALU_OPCODES(_3, 0x18, 0x18, DU_U )
	393	+END_OPCODES()
	394	+END_MNEMONIC()
	395	+
	396	+BEGIN_MNEMONIC(AND, MF_AFFECTS_FLAGS\|MF_SYMMETRIC, DU_U)
	397	+BEGIN_OPCODES()
	398	+ DEFINE_ALU_OPCODES(_4, 0x20, 0x20, DU_U )
	399	+END_OPCODES()
	400	+END_MNEMONIC()
	401	+
	402	+
	403	+BEGIN_MNEMONIC(SUB, MF_AFFECTS_FLAGS\|MF_SAME_ARG_NO_USE, DU_U)
	404	+BEGIN_OPCODES()
	405	+ DEFINE_ALU_OPCODES(_5, 0x28, 0x28, DU_U )
	406	+END_OPCODES()
	407	+END_MNEMONIC()
	408	+
	409	+
	410	+BEGIN_MNEMONIC(XOR, MF_AFFECTS_FLAGS\|MF_SYMMETRIC\|MF_SAME_ARG_NO_USE, DU_U)
	411	+BEGIN_OPCODES()
	412	+ DEFINE_ALU_OPCODES( _6, 0x30, 0x30, DU_U )
	413	+END_OPCODES()
	414	+END_MNEMONIC()
	415	+
	416	+BEGIN_MNEMONIC(CMP, MF_AFFECTS_FLAGS, U_U)
	417	+BEGIN_OPCODES()
	418	+ DEFINE_ALU_OPCODES( _7, 0x38, 0x38, U_U )
	419	+END_OPCODES()
	420	+END_MNEMONIC()
	421	+
	422	+BEGIN_MNEMONIC(CMPXCHG, MF_AFFECTS_FLAGS, N)
	423	+BEGIN_OPCODES()
	424	+ {OpcodeInfo::all, {0x0F, 0xB0, _r}, {r_m8, r8, AL}, DU_DU_DU },
	425	+ {OpcodeInfo::all, {Size16, 0x0F, 0xB1, _r}, {r_m16, r16, AX}, DU_DU_DU },
	426	+ {OpcodeInfo::all, {0x0F, 0xB1, _r}, {r_m32, r32, EAX}, DU_DU_DU},
	427	+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB1, _r}, {r_m64, r64, RAX}, DU_DU_DU },
	428	+END_OPCODES()
	429	+END_MNEMONIC()
	430	+
	431	+BEGIN_MNEMONIC(CMPXCHG8B, MF_AFFECTS_FLAGS, D)
	432	+BEGIN_OPCODES()
	433	+ {OpcodeInfo::all, {0x0F, 0xC7, _1}, {m64}, DU },
	434	+END_OPCODES()
	435	+END_MNEMONIC()
	436	+
	437	+#undef DEFINE_ALU_OPCODES
	438	+//
	439	+//
	440	+//
	441	+BEGIN_MNEMONIC(ADDSD, MF_NONE, DU_U)
	442	+BEGIN_OPCODES()
	443	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x58, _r}, {xmm64, xmm_m64}, DU_U},
	444	+END_OPCODES()
	445	+END_MNEMONIC()
	446	+
	447	+BEGIN_MNEMONIC(ADDSS, MF_NONE, DU_U)
	448	+BEGIN_OPCODES()
	449	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x58, _r}, {xmm32, xmm_m32}, DU_U},
	450	+END_OPCODES()
	451	+END_MNEMONIC()
	452	+
	453	+
	454	+BEGIN_MNEMONIC(BSF, MF_AFFECTS_FLAGS, N)
	455	+BEGIN_OPCODES()
	456	+ {OpcodeInfo::all, {0x0F, 0xBC}, {r32, r_m32}, D_U},
	457	+END_OPCODES()
	458	+END_MNEMONIC()
	459	+
	460	+BEGIN_MNEMONIC(BSR, MF_AFFECTS_FLAGS, N)
	461	+BEGIN_OPCODES()
	462	+ {OpcodeInfo::all, {0x0F, 0xBD}, {r32, r_m32}, D_U},
	463	+END_OPCODES()
	464	+END_MNEMONIC()
	465	+
	466	+
	467	+BEGIN_MNEMONIC(CALL, MF_NONE, U )
	468	+BEGIN_OPCODES()
	469	+ {OpcodeInfo::all, {0xE8, cd}, {rel32}, U },
	470	+ {OpcodeInfo::ia32, {Size16, 0xE8, cw}, {rel16}, U },
	471	+ {OpcodeInfo::ia32, {0xFF, _2}, {r_m32}, U },
	472	+ {OpcodeInfo::em64t, {0xFF, _2}, {r_m64}, U },
	473	+END_OPCODES()
	474	+END_MNEMONIC()
	475	+
	476	+BEGIN_MNEMONIC(CMC, MF_USES_FLAGS\|MF_AFFECTS_FLAGS, N)
	477	+BEGIN_OPCODES()
	478	+ {OpcodeInfo::decoder, {0xF5}, {}, N },
	479	+END_OPCODES()
	480	+END_MNEMONIC()
	481	+
	482	+//TODO: Workaround. Actually, it's D_DU, but Jitrino's CG thinks it's D_U
	483	+BEGIN_MNEMONIC(CDQ, MF_NONE, D_U )
	484	+BEGIN_OPCODES()
	485	+ {OpcodeInfo::all, {0x99}, {DX, AX}, D_U },
	486	+ {OpcodeInfo::all, {0x99}, {EDX, EAX}, D_U },
	487	+ {OpcodeInfo::em64t, {REX_W, 0x99}, {RDX, RAX}, D_U },
	488	+END_OPCODES()
	489	+END_MNEMONIC()
	490	+
	491	+#define DEFINE_CMOVcc_MNEMONIC( cc ) \
	492	+ BEGIN_MNEMONIC(CMOV##cc, MF_USES_FLAGS\|MF_CONDITIONAL, DU_U ) \
	493	+BEGIN_OPCODES() \
	494	+ {OpcodeInfo::all, {Size16, 0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r16, r_m16}, DU_U }, \
	495	+ {OpcodeInfo::all, {0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r32, r_m32}, DU_U }, \
	496	+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r64, r_m64}, DU_U }, \
	497	+END_OPCODES() \
	498	+END_MNEMONIC()
	499	+
	500	+DEFINE_CMOVcc_MNEMONIC(O)
	501	+DEFINE_CMOVcc_MNEMONIC(NO)
	502	+DEFINE_CMOVcc_MNEMONIC(B)
	503	+DEFINE_CMOVcc_MNEMONIC(NB)
	504	+DEFINE_CMOVcc_MNEMONIC(Z)
	505	+DEFINE_CMOVcc_MNEMONIC(NZ)
	506	+DEFINE_CMOVcc_MNEMONIC(BE)
	507	+DEFINE_CMOVcc_MNEMONIC(NBE)
	508	+DEFINE_CMOVcc_MNEMONIC(S)
	509	+DEFINE_CMOVcc_MNEMONIC(NS)
	510	+DEFINE_CMOVcc_MNEMONIC(P)
	511	+DEFINE_CMOVcc_MNEMONIC(NP)
	512	+DEFINE_CMOVcc_MNEMONIC(L)
	513	+DEFINE_CMOVcc_MNEMONIC(NL)
	514	+DEFINE_CMOVcc_MNEMONIC(LE)
	515	+DEFINE_CMOVcc_MNEMONIC(NLE)
	516	+
	517	+#undef DEFINE_CMOVcc_MNEMONIC
	518	+
	519	+/*****************************************************************************
	520	+ *** SSE conversion routines ***
	521	+*****************************************************************************/
	522	+//
	523	+// double -> float
	524	+BEGIN_MNEMONIC(CVTSD2SS, MF_NONE, D_U )
	525	+BEGIN_OPCODES()
	526	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x5A, _r}, {xmm32, xmm_m64}, D_U },
	527	+END_OPCODES()
	528	+END_MNEMONIC()
	529	+
	530	+// double -> I_32
	531	+BEGIN_MNEMONIC(CVTSD2SI, MF_NONE, D_U )
	532	+BEGIN_OPCODES()
	533	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x2D, _r}, {r32, xmm_m64}, D_U },
	534	+ {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2D, _r}, {r64, xmm_m64}, D_U },
	535	+END_OPCODES()
	536	+END_MNEMONIC()
	537	+
	538	+// double [truncated] -> I_32
	539	+BEGIN_MNEMONIC(CVTTSD2SI, MF_NONE, D_U )
	540	+BEGIN_OPCODES()
	541	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x2C, _r}, {r32, xmm_m64}, D_U },
	542	+ {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2C, _r}, {r64, xmm_m64}, D_U },
	543	+END_OPCODES()
	544	+END_MNEMONIC()
	545	+
	546	+// float -> double
	547	+BEGIN_MNEMONIC(CVTSS2SD, MF_NONE, D_U )
	548	+BEGIN_OPCODES()
	549	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x5A, _r}, {xmm64, xmm_m32}, D_U },
	550	+END_OPCODES()
	551	+END_MNEMONIC()
	552	+
	553	+// float -> I_32
	554	+BEGIN_MNEMONIC(CVTSS2SI, MF_NONE, D_U )
	555	+BEGIN_OPCODES()
	556	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x2D, _r}, {r32, xmm_m32}, D_U},
	557	+ {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2D, _r}, {r64, xmm_m32}, D_U},
	558	+END_OPCODES()
	559	+END_MNEMONIC()
	560	+
	561	+// float [truncated] -> I_32
	562	+BEGIN_MNEMONIC(CVTTSS2SI, MF_NONE, D_U )
	563	+BEGIN_OPCODES()
	564	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x2C, _r}, {r32, xmm_m32}, D_U},
	565	+ {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2C, _r}, {r64, xmm_m32}, D_U},
	566	+END_OPCODES()
	567	+END_MNEMONIC()
	568	+
	569	+// I_32 -> double
	570	+BEGIN_MNEMONIC(CVTSI2SD, MF_NONE, D_U )
	571	+BEGIN_OPCODES()
	572	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x2A, _r}, {xmm64, r_m32}, D_U},
	573	+ {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2A, _r}, {xmm64, r_m64}, D_U},
	574	+END_OPCODES()
	575	+END_MNEMONIC()
	576	+
	577	+// I_32 -> float
	578	+BEGIN_MNEMONIC(CVTSI2SS, MF_NONE, D_U )
	579	+BEGIN_OPCODES()
	580	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x2A, _r}, {xmm32, r_m32}, D_U},
	581	+ {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2A, _r}, {xmm32, r_m64}, D_U},
	582	+END_OPCODES()
	583	+END_MNEMONIC()
	584	+
	585	+//
	586	+// ~ SSE conversions
	587	+//
	588	+
	589	+BEGIN_MNEMONIC(DEC, MF_AFFECTS_FLAGS, DU )
	590	+BEGIN_OPCODES()
	591	+ {OpcodeInfo::all, {0xFE, _1}, {r_m8}, DU },
	592	+
	593	+ {OpcodeInfo::all, {Size16, 0xFF, _1}, {r_m16}, DU },
	594	+ {OpcodeInfo::all, {0xFF, _1}, {r_m32}, DU },
	595	+ {OpcodeInfo::em64t, {REX_W, 0xFF, _1}, {r_m64}, DU },
	596	+
	597	+ {OpcodeInfo::ia32, {Size16, 0x48\|rw}, {r16}, DU },
	598	+ {OpcodeInfo::ia32, {0x48\|rd}, {r32}, DU },
	599	+END_OPCODES()
	600	+END_MNEMONIC()
	601	+
	602	+
	603	+BEGIN_MNEMONIC(DIVSD, MF_NONE, DU_U)
	604	+BEGIN_OPCODES()
	605	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x5E, _r}, {xmm64, xmm_m64}, DU_U },
	606	+END_OPCODES()
	607	+END_MNEMONIC()
	608	+
	609	+
	610	+BEGIN_MNEMONIC(DIVSS, MF_NONE, DU_U)
	611	+BEGIN_OPCODES()
	612	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x5E, _r}, {xmm32, xmm_m32}, DU_U },
	613	+END_OPCODES()
	614	+END_MNEMONIC()
	615	+
	616	+/****************************************************************************
	617	+ *** FPU operations ***
	618	+****************************************************************************/
	619	+
	620	+BEGIN_MNEMONIC(FADDP, MF_NONE, DU )
	621	+BEGIN_OPCODES()
	622	+ {OpcodeInfo::all, {0xDE, 0xC1}, {FP0D}, DU },
	623	+ {OpcodeInfo::all, {0xDE, 0xC1}, {FP0S}, DU },
	624	+END_OPCODES()
	625	+END_MNEMONIC()
	626	+
	627	+BEGIN_MNEMONIC(FLDZ, MF_NONE, U )
	628	+BEGIN_OPCODES()
	629	+ {OpcodeInfo::all, {0xD9, 0xEE}, {FP0D}, D },
	630	+ {OpcodeInfo::all, {0xD9, 0xEE}, {FP0S}, D },
	631	+END_OPCODES()
	632	+END_MNEMONIC()
	633	+
	634	+BEGIN_MNEMONIC(FADD, MF_NONE, U )
	635	+BEGIN_OPCODES()
	636	+ {OpcodeInfo::all, {0xDC, _0}, {FP0D, m64}, DU_U },
	637	+ {OpcodeInfo::all, {0xD8, _0}, {FP0S, m32}, DU_U },
	638	+END_OPCODES()
	639	+END_MNEMONIC()
	640	+
	641	+BEGIN_MNEMONIC(FSUBP, MF_NONE, DU )
	642	+BEGIN_OPCODES()
	643	+ {OpcodeInfo::all, {0xDE, 0xE9}, {FP0D}, DU },
	644	+ {OpcodeInfo::all, {0xDE, 0xE9}, {FP0S}, DU },
	645	+END_OPCODES()
	646	+END_MNEMONIC()
	647	+
	648	+BEGIN_MNEMONIC(FSUB, MF_NONE, U )
	649	+BEGIN_OPCODES()
	650	+ {OpcodeInfo::all, {0xDC, _4}, {FP0D, m64}, DU_U },
	651	+ {OpcodeInfo::all, {0xD8, _4}, {FP0S, m32}, DU_U },
	652	+END_OPCODES()
	653	+END_MNEMONIC()
	654	+
	655	+BEGIN_MNEMONIC(FISUB, MF_NONE, U )
	656	+BEGIN_OPCODES()
	657	+ {OpcodeInfo::all, {0xDA, _4}, {FP0S, m32}, DU_U },
	658	+// {OpcodeInfo::all, {0xDE, _4}, {FP0S, m16}, DU_U },
	659	+END_OPCODES()
	660	+END_MNEMONIC()
	661	+
	662	+
	663	+
	664	+BEGIN_MNEMONIC(FMUL, MF_NONE, DU_U )
	665	+BEGIN_OPCODES()
	666	+ {OpcodeInfo::all, {0xD8, _1}, {FP0S, m32}, DU_U },
	667	+ {OpcodeInfo::all, {0xDC, _1}, {FP0D, m64}, DU_U },
	668	+END_OPCODES()
	669	+END_MNEMONIC()
	670	+
	671	+BEGIN_MNEMONIC(FMULP, MF_NONE, DU )
	672	+BEGIN_OPCODES()
	673	+ {OpcodeInfo::all, {0xDE, 0xC9}, {FP0D}, DU },
	674	+ {OpcodeInfo::all, {0xDE, 0xC9}, {FP0S}, DU },
	675	+END_OPCODES()
	676	+END_MNEMONIC()
	677	+
	678	+BEGIN_MNEMONIC(FDIVP, MF_NONE, DU )
	679	+BEGIN_OPCODES()
	680	+ {OpcodeInfo::all, {0xDE, 0xF9}, {FP0D}, DU },
	681	+ {OpcodeInfo::all, {0xDE, 0xF9}, {FP0S}, DU },
	682	+END_OPCODES()
	683	+END_MNEMONIC()
	684	+
	685	+BEGIN_MNEMONIC(FDIV, MF_NONE, U )
	686	+BEGIN_OPCODES()
	687	+ {OpcodeInfo::all, {0xDC, _6}, {FP0D, m64}, DU_U },
	688	+ {OpcodeInfo::all, {0xD8, _6}, {FP0S, m32}, DU_U },
	689	+END_OPCODES()
	690	+END_MNEMONIC()
	691	+
	692	+
	693	+BEGIN_MNEMONIC(FUCOM, MF_NONE, D_U )
	694	+BEGIN_OPCODES()
	695	+ {OpcodeInfo::all, {0xDD, 0xE1}, {FP0D, FP1D}, DU_U },
	696	+ {OpcodeInfo::all, {0xDD, 0xE1}, {FP0S, FP1S}, DU_U },
	697	+ // A little trick: actually, these 2 opcodes take only index of the
	698	+ // needed register. To make the things similar to other instructions
	699	+ // we encode here as if they took FPREG.
	700	+ {OpcodeInfo::all, {0xDD, 0xE0\|_i}, {fp32}, DU },
	701	+ {OpcodeInfo::all, {0xDD, 0xE0\|_i}, {fp64}, DU },
	702	+END_OPCODES()
	703	+END_MNEMONIC()
	704	+
	705	+BEGIN_MNEMONIC(FUCOMI, MF_NONE, D_U )
	706	+BEGIN_OPCODES()
	707	+ // A little trick: actually, these 2 opcodes take only index of the
	708	+ // needed register. To make the things similar to other instructions
	709	+ // we encode here as if they took FPREG.
	710	+ {OpcodeInfo::all, {0xDB, 0xE8\|_i}, {fp32}, DU },
	711	+ {OpcodeInfo::all, {0xDB, 0xE8\|_i}, {fp64}, DU },
	712	+END_OPCODES()
	713	+END_MNEMONIC()
	714	+
	715	+BEGIN_MNEMONIC(FUCOMP, MF_NONE, D_U )
	716	+BEGIN_OPCODES()
	717	+ {OpcodeInfo::all, {0xDD, 0xE9}, {FP0D, FP1D}, DU_U },
	718	+ {OpcodeInfo::all, {0xDD, 0xE9}, {FP0S, FP1S}, DU_U },
	719	+ // A little trick: actually, these 2 opcodes take only index of the
	720	+ // needed register. To make the things similar to other instructions
	721	+ // we encode here as if they took FPREG.
	722	+ {OpcodeInfo::all, {0xDD, 0xE8\|_i}, {fp32}, DU },
	723	+ {OpcodeInfo::all, {0xDD, 0xE8\|_i}, {fp64}, DU },
	724	+END_OPCODES()
	725	+END_MNEMONIC()
	726	+
	727	+BEGIN_MNEMONIC(FUCOMIP, MF_NONE, D_U )
	728	+BEGIN_OPCODES()
	729	+ // A little trick: actually, these 2 opcodes take only index of the
	730	+ // needed register. To make the things similar to other instructions
	731	+ // we encode here as if they took FPREG.
	732	+ {OpcodeInfo::all, {0xDF, 0xE8\|_i}, {fp32}, DU },
	733	+ {OpcodeInfo::all, {0xDF, 0xE8\|_i}, {fp64}, DU },
	734	+END_OPCODES()
	735	+END_MNEMONIC()
	736	+
	737	+BEGIN_MNEMONIC(FUCOMPP, MF_NONE, U )
	738	+BEGIN_OPCODES()
	739	+ {OpcodeInfo::all, {0xDA, 0xE9}, {FP0D, FP1D}, DU_U },
	740	+ {OpcodeInfo::all, {0xDA, 0xE9}, {FP0S, FP1S}, DU_U },
	741	+END_OPCODES()
	742	+END_MNEMONIC()
	743	+
	744	+BEGIN_MNEMONIC(FLDCW, MF_NONE, U )
	745	+BEGIN_OPCODES()
	746	+ {OpcodeInfo::all, {0xD9, _5}, {m16}, U },
	747	+END_OPCODES()
	748	+END_MNEMONIC()
	749	+
	750	+BEGIN_MNEMONIC(FNSTCW, MF_NONE, D)
	751	+BEGIN_OPCODES()
	752	+ {OpcodeInfo::all, {0xD9, _7}, {m16}, D },
	753	+END_OPCODES()
	754	+END_MNEMONIC()
	755	+
	756	+BEGIN_MNEMONIC(FSTSW, MF_NONE, D)
	757	+BEGIN_OPCODES()
	758	+ {OpcodeInfo::all, {0x9B, 0xDF, 0xE0}, {EAX}, D },
	759	+END_OPCODES()
	760	+END_MNEMONIC()
	761	+
	762	+BEGIN_MNEMONIC(FNSTSW, MF_NONE, D)
	763	+BEGIN_OPCODES()
	764	+ {OpcodeInfo::all, {0xDF, 0xE0}, {EAX}, D },
	765	+END_OPCODES()
	766	+END_MNEMONIC()
	767	+
	768	+BEGIN_MNEMONIC(FCHS, MF_NONE, DU )
	769	+BEGIN_OPCODES()
	770	+ {OpcodeInfo::all, {0xD9, 0xE0}, {FP0D}, DU },
	771	+ {OpcodeInfo::all, {0xD9, 0xE0}, {FP0S}, DU },
	772	+END_OPCODES()
	773	+END_MNEMONIC()
	774	+
	775	+BEGIN_MNEMONIC(FCLEX, MF_NONE, N)
	776	+BEGIN_OPCODES()
	777	+ {OpcodeInfo::all, {0x9B, 0xDB, 0xE2}, {}, N },
	778	+END_OPCODES()
	779	+END_MNEMONIC()
	780	+
	781	+BEGIN_MNEMONIC(FNCLEX, MF_NONE, N)
	782	+BEGIN_OPCODES()
	783	+ {OpcodeInfo::all, {0xDB, 0xE2}, {}, N },
	784	+END_OPCODES()
	785	+END_MNEMONIC()
	786	+
	787	+//BEGIN_MNEMONIC(FDECSTP, MF_NONE, N)
	788	+// BEGIN_OPCODES()
	789	+// {OpcodeInfo::all, {0xD9, 0xF6}, {}, N },
	790	+// END_OPCODES()
	791	+//END_MNEMONIC()
	792	+
	793	+BEGIN_MNEMONIC(FILD, MF_NONE, D_U )
	794	+BEGIN_OPCODES()
	795	+ {OpcodeInfo::all, {0xDB, _0}, {FP0S, m32}, D_U },
	796	+ {OpcodeInfo::all, {0xDF, _5}, {FP0D, m64}, D_U },
	797	+ {OpcodeInfo::all, {0xDB, _0}, {FP0S, m32}, D_U },
	798	+END_OPCODES()
	799	+END_MNEMONIC()
	800	+
	801	+//BEGIN_MNEMONIC(FINCSTP, MF_NONE, N)
	802	+// BEGIN_OPCODES()
	803	+// {OpcodeInfo::all, {0xD9, 0xF7}, {}, N },
	804	+// END_OPCODES()
	805	+//END_MNEMONIC()
	806	+
	807	+BEGIN_MNEMONIC(FIST, MF_NONE, D_U )
	808	+BEGIN_OPCODES()
	809	+ {OpcodeInfo::all, {0xDB, _2}, {m32, FP0S}, D_U },
	810	+END_OPCODES()
	811	+END_MNEMONIC()
	812	+
	813	+BEGIN_MNEMONIC(FISTP, MF_NONE, D_U )
	814	+BEGIN_OPCODES()
	815	+ {OpcodeInfo::all, {0xDB, _3}, {m32, FP0S}, D_U },
	816	+ {OpcodeInfo::all, {0xDF, _7}, {m64, FP0D}, D_U },
	817	+END_OPCODES()
	818	+END_MNEMONIC()
	819	+
	820	+BEGIN_MNEMONIC(FISTTP, MF_NONE, D_U )
	821	+BEGIN_OPCODES()
	822	+ {OpcodeInfo::all, {0xDD, _1}, {m64, FP0D}, D_U },
	823	+ {OpcodeInfo::all, {0xDB, _1}, {m32, FP0S}, D_U },
	824	+END_OPCODES()
	825	+END_MNEMONIC()
	826	+
	827	+BEGIN_MNEMONIC(FRNDINT, MF_NONE, DU )
	828	+BEGIN_OPCODES()
	829	+ {OpcodeInfo::all, {0xD9, 0xFC}, {FP0S}, DU },
	830	+ {OpcodeInfo::all, {0xD9, 0xFC}, {FP0D}, DU },
	831	+END_OPCODES()
	832	+END_MNEMONIC()
	833	+
	834	+BEGIN_MNEMONIC(FLD, MF_NONE, D_U )
	835	+BEGIN_OPCODES()
	836	+ {OpcodeInfo::all, {0xD9, _0}, {FP0S, m32}, D_U },
	837	+ {OpcodeInfo::all, {0xDD, _0}, {FP0D, m64}, D_U },
	838	+END_OPCODES()
	839	+END_MNEMONIC()
	840	+
	841	+BEGIN_MNEMONIC(FLDLG2, MF_NONE, U )
	842	+BEGIN_OPCODES()
	843	+ {OpcodeInfo::all, {0xD9, 0xEC}, {FP0S}, D },
	844	+ {OpcodeInfo::all, {0xD9, 0xEC}, {FP0D}, D },
	845	+END_OPCODES()
	846	+END_MNEMONIC()
	847	+
	848	+BEGIN_MNEMONIC(FLDLN2, MF_NONE, U )
	849	+BEGIN_OPCODES()
	850	+ {OpcodeInfo::all, {0xD9, 0xED}, {FP0S}, D },
	851	+ {OpcodeInfo::all, {0xD9, 0xED}, {FP0D}, D },
	852	+END_OPCODES()
	853	+END_MNEMONIC()
	854	+
	855	+BEGIN_MNEMONIC(FLD1, MF_NONE, U )
	856	+BEGIN_OPCODES()
	857	+ {OpcodeInfo::all, {0xD9, 0xE8}, {FP0S}, D },
	858	+ {OpcodeInfo::all, {0xD9, 0xE8}, {FP0D}, D },
	859	+END_OPCODES()
	860	+END_MNEMONIC()
	861	+
	862	+
	863	+BEGIN_MNEMONIC(FPREM, MF_NONE, N)
	864	+ BEGIN_OPCODES()
	865	+ {OpcodeInfo::all, {0xD9, 0xF8}, {}, N },
	866	+ END_OPCODES()
	867	+END_MNEMONIC()
	868	+
	869	+BEGIN_MNEMONIC(FPREM1, MF_NONE, N)
	870	+BEGIN_OPCODES()
	871	+ {OpcodeInfo::all, {0xD9, 0xF5}, {}, N },
	872	+END_OPCODES()
	873	+END_MNEMONIC()
	874	+
	875	+BEGIN_MNEMONIC(FST, MF_NONE, D_U )
	876	+BEGIN_OPCODES()
	877	+ {OpcodeInfo::all, {0xD9, _2}, {m32, FP0S}, D_U },
	878	+ {OpcodeInfo::all, {0xDD, _2}, {m64, FP0D}, D_U },
	879	+ // A little trick: actually, these 2 opcodes take only index of the
	880	+ // needed register. To make the things similar to other instructions
	881	+ // we encode here as if they took FPREG.
	882	+ {OpcodeInfo::all, {0xDD, 0xD0\|_i}, {fp32}, D },
	883	+ {OpcodeInfo::all, {0xDD, 0xD0\|_i}, {fp64}, D },
	884	+END_OPCODES()
	885	+END_MNEMONIC()
	886	+
	887	+BEGIN_MNEMONIC(FSTP, MF_NONE, D_U )
	888	+BEGIN_OPCODES()
	889	+ {OpcodeInfo::all, {0xD9, _3}, {m32, FP0S}, D_U },
	890	+ {OpcodeInfo::all, {0xDD, _3}, {m64, FP0D}, D_U },
	891	+ // A little trick: actually, these 2 opcodes take only index of the
	892	+ // needed register. To make the things similar to other instructions
	893	+ // we encode here as if they took FPREG.
	894	+ {OpcodeInfo::all, {0xDD, 0xD8\|_i}, {fp32}, D },
	895	+ {OpcodeInfo::all, {0xDD, 0xD8\|_i}, {fp64}, D },
	896	+END_OPCODES()
	897	+END_MNEMONIC()
	898	+
	899	+BEGIN_MNEMONIC(FSQRT, MF_NONE, DU)
	900	+ BEGIN_OPCODES()
	901	+ {OpcodeInfo::all, {0xD9, 0xFA}, {FP0S}, DU },
	902	+ {OpcodeInfo::all, {0xD9, 0xFA}, {FP0D}, DU },
	903	+ END_OPCODES()
	904	+END_MNEMONIC()
	905	+
	906	+
	907	+BEGIN_MNEMONIC(FYL2X, MF_NONE, DU)
	908	+ BEGIN_OPCODES()
	909	+ {OpcodeInfo::all, {0xD9, 0xF1}, {FP0S}, DU },
	910	+ {OpcodeInfo::all, {0xD9, 0xF1}, {FP0D}, DU },
	911	+ END_OPCODES()
	912	+END_MNEMONIC()
	913	+
	914	+
	915	+BEGIN_MNEMONIC(FYL2XP1, MF_NONE, DU)
	916	+ BEGIN_OPCODES()
	917	+ {OpcodeInfo::all, {0xD9, 0xF9}, {FP0S}, DU },
	918	+ {OpcodeInfo::all, {0xD9, 0xF9}, {FP0D}, DU },
	919	+ END_OPCODES()
	920	+END_MNEMONIC()
	921	+
	922	+BEGIN_MNEMONIC(F2XM1, MF_NONE, DU)
	923	+ BEGIN_OPCODES()
	924	+ {OpcodeInfo::all, {0xD9, 0xF0}, {FP0S}, DU },
	925	+ {OpcodeInfo::all, {0xD9, 0xF0}, {FP0D}, DU },
	926	+ END_OPCODES()
	927	+END_MNEMONIC()
	928	+
	929	+BEGIN_MNEMONIC(FPATAN, MF_NONE, DU)
	930	+ BEGIN_OPCODES()
	931	+ {OpcodeInfo::all, {0xD9, 0xF3}, {FP0S}, DU },
	932	+ {OpcodeInfo::all, {0xD9, 0xF3}, {FP0D}, DU },
	933	+ END_OPCODES()
	934	+END_MNEMONIC()
	935	+
	936	+BEGIN_MNEMONIC(FXCH, MF_NONE, DU)
	937	+ BEGIN_OPCODES()
	938	+ {OpcodeInfo::all, {0xD9, 0xC9}, {FP0S}, DU },
	939	+ {OpcodeInfo::all, {0xD9, 0xC9}, {FP0D}, DU },
	940	+ END_OPCODES()
	941	+END_MNEMONIC()
	942	+
	943	+BEGIN_MNEMONIC(FSCALE, MF_NONE, DU)
	944	+ BEGIN_OPCODES()
	945	+ {OpcodeInfo::all, {0xD9, 0xFD}, {FP0S}, DU },
	946	+ {OpcodeInfo::all, {0xD9, 0xFD}, {FP0D}, DU },
	947	+ END_OPCODES()
	948	+END_MNEMONIC()
	949	+
	950	+BEGIN_MNEMONIC(FABS, MF_NONE, DU)
	951	+ BEGIN_OPCODES()
	952	+ {OpcodeInfo::all, {0xD9, 0xE1}, {FP0S}, DU },
	953	+ {OpcodeInfo::all, {0xD9, 0xE1}, {FP0D}, DU },
	954	+ END_OPCODES()
	955	+END_MNEMONIC()
	956	+
	957	+BEGIN_MNEMONIC(FSIN, MF_NONE, DU)
	958	+ BEGIN_OPCODES()
	959	+ {OpcodeInfo::all, {0xD9, 0xFE}, {FP0S}, DU },
	960	+ {OpcodeInfo::all, {0xD9, 0xFE}, {FP0D}, DU },
	961	+ END_OPCODES()
	962	+END_MNEMONIC()
	963	+
	964	+BEGIN_MNEMONIC(FCOS, MF_NONE, DU)
	965	+ BEGIN_OPCODES()
	966	+ {OpcodeInfo::all, {0xD9, 0xFF}, {FP0S}, DU },
	967	+ {OpcodeInfo::all, {0xD9, 0xFF}, {FP0D}, DU },
	968	+ END_OPCODES()
	969	+END_MNEMONIC()
	970	+
	971	+BEGIN_MNEMONIC(FPTAN, MF_NONE, DU)
	972	+ BEGIN_OPCODES()
	973	+ {OpcodeInfo::all, {0xD9, 0xF2}, {FP0S}, DU },
	974	+ {OpcodeInfo::all, {0xD9, 0xF2}, {FP0D}, DU },
	975	+ END_OPCODES()
	976	+END_MNEMONIC()
	977	+
	978	+//
	979	+// ~ FPU
	980	+//
	981	+
	982	+BEGIN_MNEMONIC(DIV, MF_AFFECTS_FLAGS, DU_DU_U)
	983	+BEGIN_OPCODES()
	984	+#if !defined(_EM64T_)
	985	+ {OpcodeInfo::all, {0xF6, _6}, {AH, AL, r_m8}, DU_DU_U },
	986	+ {OpcodeInfo::all, {Size16, 0xF7, _6}, {DX, AX, r_m16}, DU_DU_U },
	987	+#endif
	988	+ {OpcodeInfo::all, {0xF7, _6}, {EDX, EAX, r_m32}, DU_DU_U },
	989	+ {OpcodeInfo::em64t, {REX_W, 0xF7, _6}, {RDX, RAX, r_m64}, DU_DU_U },
	990	+END_OPCODES()
	991	+END_MNEMONIC()
	992	+
	993	+BEGIN_MNEMONIC(IDIV, MF_AFFECTS_FLAGS, DU_DU_U)
	994	+BEGIN_OPCODES()
	995	+#if !defined(_EM64T_)
	996	+ {OpcodeInfo::all, {0xF6, _7}, {AH, AL, r_m8}, DU_DU_U },
	997	+ {OpcodeInfo::all, {Size16, 0xF7, _7}, {DX, AX, r_m16}, DU_DU_U },
	998	+#endif
	999	+ {OpcodeInfo::all, {0xF7, _7}, {EDX, EAX, r_m32}, DU_DU_U },
	1000	+ {OpcodeInfo::em64t, {REX_W, 0xF7, _7}, {RDX, RAX, r_m64}, DU_DU_U },
	1001	+END_OPCODES()
	1002	+END_MNEMONIC()
	1003	+
	1004	+
	1005	+BEGIN_MNEMONIC(IMUL, MF_AFFECTS_FLAGS, D_DU_U)
	1006	+BEGIN_OPCODES()
	1007	+ /*{OpcodeInfo::all, {0xF6, _5}, {AH, AL, r_m8}, D_DU_U },
	1008	+ {OpcodeInfo::all, {Size16, 0xF7, _5}, {DX, AX, r_m16}, D_DU_U },
	1009	+ */
	1010	+ //
	1011	+ {OpcodeInfo::all, {0xF7, _5}, {EDX, EAX, r_m32}, D_DU_U },
	1012	+ //todo: this opcode's hash conflicts with IMUL r64,r_m64 - they're both 0.
	1013	+ // this particular is not currently used, so we may safely drop it, but need to
	1014	+ // revisit the hash implementation
	1015	+ // {OpcodeInfo::em64t, {REX_W, 0xF7, _5}, {RDX, RAX, r_m64}, D_DU_U },
	1016	+ //
	1017	+ {OpcodeInfo::all, {Size16, 0x0F, 0xAF, _r}, {r16,r_m16}, DU_U },
	1018	+ {OpcodeInfo::all, {0x0F, 0xAF, _r}, {r32,r_m32}, DU_U },
	1019	+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xAF, _r}, {r64,r_m64}, DU_U },
	1020	+ {OpcodeInfo::all, {Size16, 0x6B, _r, ib}, {r16,r_m16,imm8s}, D_DU_U },
	1021	+ {OpcodeInfo::all, {0x6B, _r, ib}, {r32,r_m32,imm8s}, D_DU_U },
	1022	+ {OpcodeInfo::em64t, {REX_W, 0x6B, _r, ib}, {r64,r_m64,imm8s}, D_DU_U },
	1023	+ {OpcodeInfo::all, {Size16, 0x6B, _r, ib}, {r16,imm8s}, DU_U },
	1024	+ {OpcodeInfo::all, {0x6B, _r, ib}, {r32,imm8s}, DU_U },
	1025	+ {OpcodeInfo::em64t, {REX_W, 0x6B, _r, ib}, {r64,imm8s}, DU_U },
	1026	+ {OpcodeInfo::all, {Size16, 0x69, _r, iw}, {r16,r_m16,imm16}, D_U_U },
	1027	+ {OpcodeInfo::all, {0x69, _r, id}, {r32,r_m32,imm32}, D_U_U },
	1028	+ {OpcodeInfo::em64t, {REX_W, 0x69, _r, id}, {r64,r_m64,imm32s}, D_U_U },
	1029	+ {OpcodeInfo::all, {Size16, 0x69, _r, iw}, {r16,imm16}, DU_U },
	1030	+ {OpcodeInfo::all, {0x69, _r, id}, {r32,imm32}, DU_U },
	1031	+END_OPCODES()
	1032	+END_MNEMONIC()
	1033	+
	1034	+BEGIN_MNEMONIC(MUL, MF_AFFECTS_FLAGS, U )
	1035	+BEGIN_OPCODES()
	1036	+ {OpcodeInfo::all, {0xF6, _4}, {AX, AL, r_m8}, D_DU_U },
	1037	+ {OpcodeInfo::all, {Size16, 0xF7, _4}, {DX, AX, r_m16}, D_DU_U },
	1038	+ {OpcodeInfo::all, {0xF7, _4}, {EDX, EAX, r_m32}, D_DU_U },
	1039	+ {OpcodeInfo::em64t, {REX_W, 0xF7, _4}, {RDX, RAX, r_m64}, D_DU_U },
	1040	+END_OPCODES()
	1041	+END_MNEMONIC()
	1042	+
	1043	+BEGIN_MNEMONIC(INC, MF_AFFECTS_FLAGS, DU )
	1044	+BEGIN_OPCODES()
	1045	+ {OpcodeInfo::all, {0xFE, _0}, {r_m8}, DU },
	1046	+ {OpcodeInfo::all, {Size16, 0xFF, _0}, {r_m16}, DU },
	1047	+ {OpcodeInfo::all, {0xFF, _0}, {r_m32}, DU },
	1048	+ {OpcodeInfo::em64t, {REX_W, 0xFF, _0}, {r_m64}, DU },
	1049	+ {OpcodeInfo::ia32, {Size16, 0x40\|rw}, {r16}, DU },
	1050	+ {OpcodeInfo::ia32, {0x40\|rd}, {r32}, DU },
	1051	+END_OPCODES()
	1052	+END_MNEMONIC()
	1053	+
	1054	+BEGIN_MNEMONIC(INT3, MF_NONE, N)
	1055	+BEGIN_OPCODES()
	1056	+ {OpcodeInfo::all, {0xCC}, {}, N },
	1057	+END_OPCODES()
	1058	+END_MNEMONIC()
	1059	+
	1060	+#define DEFINE_Jcc_MNEMONIC( cc ) \
	1061	+ BEGIN_MNEMONIC(J##cc, MF_USES_FLAGS\|MF_CONDITIONAL, U ) \
	1062	+BEGIN_OPCODES() \
	1063	+ {OpcodeInfo::all, {0x70 + ConditionMnemonic_##cc, cb }, { rel8 }, U }, \
	1064	+ {OpcodeInfo::ia32, {Size16, 0x0F, 0x80 + ConditionMnemonic_##cc, cw}, { rel16 }, U }, \
	1065	+ {OpcodeInfo::all, {0x0F, 0x80 + ConditionMnemonic_##cc, cd}, { rel32 }, U }, \
	1066	+END_OPCODES() \
	1067	+END_MNEMONIC()
	1068	+
	1069	+
	1070	+DEFINE_Jcc_MNEMONIC(O)
	1071	+DEFINE_Jcc_MNEMONIC(NO)
	1072	+DEFINE_Jcc_MNEMONIC(B)
	1073	+DEFINE_Jcc_MNEMONIC(NB)
	1074	+DEFINE_Jcc_MNEMONIC(Z)
	1075	+DEFINE_Jcc_MNEMONIC(NZ)
	1076	+DEFINE_Jcc_MNEMONIC(BE)
	1077	+DEFINE_Jcc_MNEMONIC(NBE)
	1078	+
	1079	+DEFINE_Jcc_MNEMONIC(S)
	1080	+DEFINE_Jcc_MNEMONIC(NS)
	1081	+DEFINE_Jcc_MNEMONIC(P)
	1082	+DEFINE_Jcc_MNEMONIC(NP)
	1083	+DEFINE_Jcc_MNEMONIC(L)
	1084	+DEFINE_Jcc_MNEMONIC(NL)
	1085	+DEFINE_Jcc_MNEMONIC(LE)
	1086	+DEFINE_Jcc_MNEMONIC(NLE)
	1087	+
	1088	+#undef DEFINE_Jcc_MNEMONIC
	1089	+
	1090	+BEGIN_MNEMONIC(JMP, MF_NONE, U )
	1091	+BEGIN_OPCODES()
	1092	+ {OpcodeInfo::all, {0xEB, cb}, {rel8}, U },
	1093	+ {OpcodeInfo::ia32, {Size16, 0xE9, cw}, {rel16}, U },
	1094	+ {OpcodeInfo::all, {0xE9, cd}, {rel32}, U },
	1095	+ {OpcodeInfo::ia32, {Size16, 0xFF, _4}, {r_m16}, U },
	1096	+ {OpcodeInfo::ia32, {0xFF, _4}, {r_m32}, U },
	1097	+ {OpcodeInfo::em64t, {0xFF, _4}, {r_m64}, U },
	1098	+END_OPCODES()
	1099	+END_MNEMONIC()
	1100	+
	1101	+BEGIN_MNEMONIC(LEA, MF_NONE, D_U )
	1102	+BEGIN_OPCODES()
	1103	+ /*
	1104	+ A special case: the LEA instruction itself does not care about size of
	1105	+ second operand. This is obviuos why it is, and thus in The Manual, a
	1106	+ simple 'm' without size is used.
	1107	+ However, in the Jitrino's instrucitons we'll have an operand with a size.
	1108	+ Also, the hashing scheme is not supposed to handle OpndSize_Null, and
	1109	+ making it to do so will lead to unnecessary complication of hashing
	1110	+ scheme. Thus, instead of handling it as a special case, we simply make
	1111	+ copies of the opcodes with sizes set.
	1112	+ {OpcodeInfo::all, {0x8D, _r}, {r32, m}, D_U },
	1113	+ {OpcodeInfo::em64t, {0x8D, _r}, {r64, m}, D_U },
	1114	+ */
	1115	+ //Android x86: keep r32, m32 only, otherwise, will have decoding error
	1116	+ //{OpcodeInfo::all, {0x8D, _r}, {r32, m8}, D_U },
	1117	+ {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m8}, D_U },
	1118	+ //{OpcodeInfo::all, {0x8D, _r}, {r32, m16}, D_U },
	1119	+ {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m16}, D_U },
	1120	+ {OpcodeInfo::all, {0x8D, _r}, {r32, m32}, D_U },
	1121	+ {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m32}, D_U },
	1122	+ {OpcodeInfo::all, {0x8D, _r}, {r32, m64}, D_U },
	1123	+ {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m64}, D_U },
	1124	+END_OPCODES()
	1125	+END_MNEMONIC()
	1126	+
	1127	+BEGIN_MNEMONIC(LOOP, MF_AFFECTS_FLAGS\|MF_USES_FLAGS, DU_U)
	1128	+BEGIN_OPCODES()
	1129	+ {OpcodeInfo::all, {0xE2, cb}, {ECX, rel8}, DU_U },
	1130	+END_OPCODES()
	1131	+END_MNEMONIC()
	1132	+
	1133	+BEGIN_MNEMONIC(LOOPE, MF_AFFECTS_FLAGS\|MF_USES_FLAGS, DU_U)
	1134	+BEGIN_OPCODES()
	1135	+ {OpcodeInfo::all, {0xE1, cb}, {ECX, rel8}, DU_U },
	1136	+END_OPCODES()
	1137	+END_MNEMONIC()
	1138	+
	1139	+BEGIN_MNEMONIC(LOOPNE, MF_AFFECTS_FLAGS\|MF_USES_FLAGS, DU_U)
	1140	+BEGIN_OPCODES()
	1141	+ {OpcodeInfo::all, {0xE0, cb}, {ECX, rel8}, DU_U },
	1142	+END_OPCODES()
	1143	+END_MNEMONIC()
	1144	+
	1145	+BEGIN_MNEMONIC(MOV, MF_NONE, D_U)
	1146	+BEGIN_OPCODES()
	1147	+ {OpcodeInfo::all, {0x88, _r}, {r_m8,r8}, D_U },
	1148	+
	1149	+ {OpcodeInfo::all, {Size16, 0x89, _r}, {r_m16,r16}, D_U },
	1150	+ {OpcodeInfo::all, {0x89, _r}, {r_m32,r32}, D_U },
	1151	+ {OpcodeInfo::em64t, {REX_W, 0x89, _r}, {r_m64,r64}, D_U },
	1152	+ {OpcodeInfo::all, {0x8A, _r}, {r8,r_m8}, D_U },
	1153	+
	1154	+ {OpcodeInfo::all, {Size16, 0x8B, _r}, {r16,r_m16}, D_U },
	1155	+ {OpcodeInfo::all, {0x8B, _r}, {r32,r_m32}, D_U },
	1156	+ {OpcodeInfo::em64t, {REX_W, 0x8B, _r}, {r64,r_m64}, D_U },
	1157	+
	1158	+ {OpcodeInfo::all, {0xB0\|rb}, {r8,imm8}, D_U },
	1159	+
	1160	+ {OpcodeInfo::all, {Size16, 0xB8\|rw}, {r16,imm16}, D_U },
	1161	+ {OpcodeInfo::all, {0xB8\|rd}, {r32,imm32}, D_U },
	1162	+ {OpcodeInfo::em64t, {REX_W, 0xB8\|rd}, {r64,imm64}, D_U },
	1163	+ {OpcodeInfo::all, {0xC6, _0}, {r_m8,imm8}, D_U },
	1164	+
	1165	+ {OpcodeInfo::all, {Size16, 0xC7, _0}, {r_m16,imm16}, D_U },
	1166	+ {OpcodeInfo::all, {0xC7, _0}, {r_m32,imm32}, D_U },
	1167	+ {OpcodeInfo::em64t, {REX_W, 0xC7, _0}, {r_m64,imm32s}, D_U },
	1168	+
	1169	+ {OpcodeInfo::decoder, {0xA0}, {AL, moff8}, D_U },
	1170	+ {OpcodeInfo::decoder, {Size16, 0xA1}, {AX, moff16}, D_U },
	1171	+ {OpcodeInfo::decoder, {0xA1}, {EAX, moff32}, D_U },
	1172	+ //{OpcodeInfo::decoder64, {REX_W, 0xA1}, {RAX, moff64}, D_U },
	1173	+
	1174	+ {OpcodeInfo::decoder, {0xA2}, {moff8, AL}, D_U },
	1175	+ {OpcodeInfo::decoder, {Size16, 0xA3}, {moff16, AX}, D_U },
	1176	+ {OpcodeInfo::decoder, {0xA3}, {moff32, EAX}, D_U },
	1177	+ //{OpcodeInfo::decoder64, {REX_W, 0xA3}, {moff64, RAX}, D_U },
	1178	+END_OPCODES()
	1179	+END_MNEMONIC()
	1180	+
	1181	+
	1182	+
	1183	+BEGIN_MNEMONIC(XCHG, MF_NONE, DU_DU )
	1184	+BEGIN_OPCODES()
	1185	+ {OpcodeInfo::all, {0x87, _r}, {r_m32,r32}, DU_DU },
	1186	+END_OPCODES()
	1187	+END_MNEMONIC()
	1188	+
	1189	+
	1190	+BEGIN_MNEMONIC(MOVQ, MF_NONE, D_U )
	1191	+BEGIN_OPCODES()
	1192	+#ifdef _HAVE_MMX_
	1193	+ {OpcodeInfo::all, {0x0F, 0x6F, _r}, {mm64, mm_m64}, D_U },
	1194	+ {OpcodeInfo::all, {0x0F, 0x7F, _r}, {mm_m64, mm64}, D_U },
	1195	+#endif
	1196	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x7E }, {xmm64, xmm_m64}, D_U },
	1197	+ {OpcodeInfo::all, {0x66, 0x0F, 0xD6 }, {xmm_m64, xmm64}, D_U },
	1198	+// {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x6E, _r}, {xmm64, r_m64}, D_U },
	1199	+// {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x7E, _r}, {r_m64, xmm64}, D_U },
	1200	+ {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x6E, _r}, {xmm64, r64}, D_U },
	1201	+ {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x7E, _r}, {r64, xmm64}, D_U },
	1202	+END_OPCODES()
	1203	+END_MNEMONIC()
	1204	+
	1205	+
	1206	+BEGIN_MNEMONIC(MOVD, MF_NONE, D_U )
	1207	+BEGIN_OPCODES()
	1208	+ {OpcodeInfo::all, {0x66, 0x0F, 0x6E, _r}, {xmm32, r_m32}, D_U },
	1209	+ {OpcodeInfo::all, {0x66, 0x0F, 0x7E, _r}, {r_m32, xmm32}, D_U },
	1210	+END_OPCODES()
	1211	+END_MNEMONIC()
	1212	+
	1213	+//
	1214	+// A bunch of MMX instructions
	1215	+//
	1216	+#ifdef _HAVE_MMX_
	1217	+
	1218	+BEGIN_MNEMONIC(EMMS, MF_NONE, N)
	1219	+BEGIN_OPCODES()
	1220	+ {OpcodeInfo::all, {0x0F, 0x77}, {}, N },
	1221	+END_OPCODES()
	1222	+END_MNEMONIC()
	1223	+
	1224	+#endif
	1225	+
	1226	+BEGIN_MNEMONIC(PADDQ, MF_NONE, DU_U)
	1227	+BEGIN_OPCODES()
	1228	+#ifdef _HAVE_MMX_
	1229	+ {OpcodeInfo::all, {0x0F, 0xD4, _r}, {mm64, mm_m64}, DU_U },
	1230	+#endif
	1231	+ {OpcodeInfo::all, {0x66, 0x0F, 0xD4, _r}, {xmm64, xmm_m64}, DU_U },
	1232	+END_OPCODES()
	1233	+END_MNEMONIC()
	1234	+
	1235	+BEGIN_MNEMONIC(PAND, MF_NONE, DU_U)
	1236	+BEGIN_OPCODES()
	1237	+#ifdef _HAVE_MMX_
	1238	+ {OpcodeInfo::all, {0x0F, 0xDB, _r}, {mm64, mm_m64}, DU_U },
	1239	+#endif
	1240	+ {OpcodeInfo::all, {0x66, 0x0F, 0xDB, _r}, {xmm64, xmm_m64}, DU_U },
	1241	+END_OPCODES()
	1242	+END_MNEMONIC()
	1243	+
	1244	+BEGIN_MNEMONIC(POR, MF_NONE, DU_U)
	1245	+BEGIN_OPCODES()
	1246	+#ifdef _HAVE_MMX_
	1247	+ {OpcodeInfo::all, {0x0F, 0xEB, _r}, {mm64, mm_m64}, DU_U },
	1248	+#endif
	1249	+ {OpcodeInfo::all, {0x66, 0x0F, 0xEB, _r}, {xmm64, xmm_m64}, DU_U },
	1250	+END_OPCODES()
	1251	+END_MNEMONIC()
	1252	+
	1253	+BEGIN_MNEMONIC(PSUBQ, MF_NONE, DU_U)
	1254	+BEGIN_OPCODES()
	1255	+#ifdef _HAVE_MMX_
	1256	+ {OpcodeInfo::all, {0x0F, 0xFB, _r}, {mm64, mm_m64}, DU_U },
	1257	+#endif
	1258	+ {OpcodeInfo::all, {0x66, 0x0F, 0xFB, _r}, {xmm64, xmm_m64}, DU_U },
	1259	+END_OPCODES()
	1260	+END_MNEMONIC()
	1261	+
	1262	+BEGIN_MNEMONIC(PANDN, MF_NONE, DU_U)
	1263	+BEGIN_OPCODES()
	1264	+#ifdef _HAVE_MMX_
	1265	+ {OpcodeInfo::all, {0x0F, 0xDF, _r}, {mm64, mm_m64}, DU_U },
	1266	+#endif
	1267	+ {OpcodeInfo::all, {0x66, 0x0F, 0xDF, _r}, {xmm64, xmm_m64}, DU_U },
	1268	+END_OPCODES()
	1269	+END_MNEMONIC()
	1270	+BEGIN_MNEMONIC(PSLLQ, MF_NONE, DU_U)
	1271	+BEGIN_OPCODES()
	1272	+#ifdef _HAVE_MMX_
	1273	+ {OpcodeInfo::all, {0x0F, 0xF3, _r}, {mm64, mm_m64}, DU_U },
	1274	+#endif
	1275	+ {OpcodeInfo::all, {0x66, 0x0F, 0xF3, _r}, {xmm64, xmm_m64}, DU_U },
	1276	+ {OpcodeInfo::all, {0x66, 0x0F, 0x73, _6, ib}, {xmm64, imm8}, DU_U },
	1277	+END_OPCODES()
	1278	+END_MNEMONIC()
	1279	+BEGIN_MNEMONIC(PSRLQ, MF_NONE, DU_U)
	1280	+BEGIN_OPCODES()
	1281	+#ifdef _HAVE_MMX_
	1282	+ {OpcodeInfo::all, {0x0F, 0xD3, _r}, {mm64, mm_m64}, DU_U },
	1283	+#endif
	1284	+ {OpcodeInfo::all, {0x66, 0x0F, 0xD3, _r}, {xmm64, xmm_m64}, DU_U },
	1285	+ {OpcodeInfo::all, {0x66, 0x0F, 0x73, _2, ib}, {xmm64, imm8}, DU_U },
	1286	+END_OPCODES()
	1287	+END_MNEMONIC()
	1288	+
	1289	+BEGIN_MNEMONIC(PXOR, MF_NONE, DU_U)
	1290	+BEGIN_OPCODES()
	1291	+#ifdef _HAVE_MMX_
	1292	+ {OpcodeInfo::all, {0x0F, 0xEF, _r}, {mm64, mm_m64}, DU_U },
	1293	+#endif
	1294	+ {OpcodeInfo::all, {0x66, 0x0F, 0xEF, _r}, {xmm64, xmm_m64}, DU_U },
	1295	+END_OPCODES()
	1296	+END_MNEMONIC()
	1297	+
	1298	+
	1299	+BEGIN_MNEMONIC(MOVAPD, MF_NONE, D_U )
	1300	+BEGIN_OPCODES()
	1301	+ {OpcodeInfo::all, {0x66, 0x0F, 0x28, _r}, {xmm64, xmm_m64}, D_U },
	1302	+ {OpcodeInfo::all, {0x66, 0x0F, 0x29, _r}, {xmm_m64, xmm64}, D_U },
	1303	+END_OPCODES()
	1304	+END_MNEMONIC()
	1305	+
	1306	+BEGIN_MNEMONIC(MOVAPS, MF_NONE, D_U )
	1307	+BEGIN_OPCODES()
	1308	+ {OpcodeInfo::all, {0x0F, 0x28, _r}, {xmm64, xmm_m64}, D_U },
	1309	+ {OpcodeInfo::all, {0x0F, 0x29, _r}, {xmm_m64, xmm64}, D_U },
	1310	+END_OPCODES()
	1311	+END_MNEMONIC()
	1312	+
	1313	+BEGIN_MNEMONIC(SHUFPS, MF_NONE, D_U_U )
	1314	+BEGIN_OPCODES()
	1315	+ {OpcodeInfo::all, {0x0F, 0xC6, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U },
	1316	+END_OPCODES()
	1317	+END_MNEMONIC()
	1318	+
	1319	+
	1320	+BEGIN_MNEMONIC(MOVSD, MF_NONE, D_U )
	1321	+BEGIN_OPCODES()
	1322	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x10, _r}, {xmm64, xmm_m64}, D_U },
	1323	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x11, _r}, {xmm_m64, xmm64}, D_U },
	1324	+END_OPCODES()
	1325	+END_MNEMONIC()
	1326	+
	1327	+BEGIN_MNEMONIC(MOVSS, MF_NONE, D_U )
	1328	+BEGIN_OPCODES()
	1329	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x10, _r}, {xmm32, xmm_m32}, D_U },
	1330	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x11, _r}, {xmm_m32, xmm32}, D_U },
	1331	+END_OPCODES()
	1332	+END_MNEMONIC()
	1333	+
	1334	+BEGIN_MNEMONIC(MOVSX, MF_NONE, D_U )
	1335	+BEGIN_OPCODES()
	1336	+ {OpcodeInfo::all, {Size16, 0x0F, 0xBE, _r}, {r16, r_m8s}, D_U },
	1337	+ {OpcodeInfo::all, {0x0F, 0xBE, _r}, {r32, r_m8s}, D_U },
	1338	+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xBE, _r}, {r64, r_m8s}, D_U },
	1339	+
	1340	+ {OpcodeInfo::all, {0x0F, 0xBF, _r}, {r32, r_m16s}, D_U },
	1341	+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xBF, _r}, {r64, r_m16s}, D_U },
	1342	+
	1343	+ {OpcodeInfo::em64t, {REX_W, 0x63, _r}, {r64, r_m32s}, D_U },
	1344	+END_OPCODES()
	1345	+END_MNEMONIC()
	1346	+
	1347	+BEGIN_MNEMONIC(MOVZX, MF_NONE, D_U )
	1348	+BEGIN_OPCODES()
	1349	+ {OpcodeInfo::all, {Size16, 0x0F, 0xB6, _r}, {r16, r_m8u}, D_U },
	1350	+ {OpcodeInfo::all, {0x0F, 0xB6, _r}, {r32, r_m8u}, D_U },
	1351	+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB6, _r}, {r64, r_m8u}, D_U },
	1352	+
	1353	+ {OpcodeInfo::all, {0x0F, 0xB7, _r}, {r32, r_m16u}, D_U },
	1354	+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB7, _r}, {r64, r_m16u}, D_U },
	1355	+ //workaround to get r/rm32->r64 ZX mov functionality:
	1356	+ //simple 32bit reg copying zeros high bits in 64bit reg
	1357	+ {OpcodeInfo::em64t, {0x8B, _r}, {r64, r_m32u}, D_U },
	1358	+END_OPCODES()
	1359	+END_MNEMONIC()
	1360	+
	1361	+BEGIN_MNEMONIC(MULSD, MF_NONE, DU_U)
	1362	+BEGIN_OPCODES()
	1363	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x59, _r}, {xmm64, xmm_m64}, DU_U },
	1364	+END_OPCODES()
	1365	+END_MNEMONIC()
	1366	+
	1367	+BEGIN_MNEMONIC(MULSS, MF_NONE, DU_U)
	1368	+BEGIN_OPCODES()
	1369	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x59, _r}, {xmm32, xmm_m32}, DU_U },
	1370	+END_OPCODES()
	1371	+END_MNEMONIC()
	1372	+
	1373	+BEGIN_MNEMONIC(NEG, MF_AFFECTS_FLAGS, DU )
	1374	+BEGIN_OPCODES()
	1375	+ {OpcodeInfo::all, {0xF6, _3}, {r_m8}, DU },
	1376	+
	1377	+ {OpcodeInfo::all, {Size16, 0xF7, _3}, {r_m16}, DU },
	1378	+ {OpcodeInfo::all, {0xF7, _3}, {r_m32}, DU },
	1379	+ {OpcodeInfo::em64t, {REX_W, 0xF7, _3}, {r_m64}, DU },
	1380	+END_OPCODES()
	1381	+END_MNEMONIC()
	1382	+
	1383	+BEGIN_MNEMONIC(NOP, MF_NONE, N)
	1384	+BEGIN_OPCODES()
	1385	+ {OpcodeInfo::all, {0x90}, {}, N },
	1386	+END_OPCODES()
	1387	+END_MNEMONIC()
	1388	+
	1389	+BEGIN_MNEMONIC(NOT, MF_AFFECTS_FLAGS, DU )
	1390	+BEGIN_OPCODES()
	1391	+ {OpcodeInfo::all, {0xF6, _2}, {r_m8}, DU },
	1392	+ {OpcodeInfo::all, {Size16, 0xF7, _2}, {r_m16}, DU },
	1393	+ {OpcodeInfo::all, {0xF7, _2}, {r_m32}, DU },
	1394	+ {OpcodeInfo::em64t, {REX_W, 0xF7, _2}, {r_m64}, DU },
	1395	+END_OPCODES()
	1396	+END_MNEMONIC()
	1397	+
	1398	+BEGIN_MNEMONIC(POP, MF_NONE, D)
	1399	+BEGIN_OPCODES()
	1400	+ {OpcodeInfo::all, {Size16, 0x8F, _0}, {r_m16}, D },
	1401	+ {OpcodeInfo::ia32, {0x8F, _0}, {r_m32}, D },
	1402	+ {OpcodeInfo::em64t, {0x8F, _0}, {r_m64}, D },
	1403	+
	1404	+ {OpcodeInfo::all, {Size16, 0x58\|rw }, {r16}, D },
	1405	+ {OpcodeInfo::ia32, {0x58\|rd }, {r32}, D },
	1406	+ {OpcodeInfo::em64t, {0x58\|rd }, {r64}, D },
	1407	+END_OPCODES()
	1408	+END_MNEMONIC()
	1409	+
	1410	+BEGIN_MNEMONIC(POPFD, MF_AFFECTS_FLAGS, N)
	1411	+BEGIN_OPCODES()
	1412	+ {OpcodeInfo::all, {0x9D}, {}, N },
	1413	+END_OPCODES()
	1414	+END_MNEMONIC()
	1415	+
	1416	+BEGIN_MNEMONIC(PREFETCH, MF_NONE, U)
	1417	+BEGIN_OPCODES()
	1418	+ {OpcodeInfo::all, {0x0F, 0x18, _0}, {m8}, U },
	1419	+END_OPCODES()
	1420	+END_MNEMONIC()
	1421	+
	1422	+BEGIN_MNEMONIC(PUSH, MF_NONE, U )
	1423	+BEGIN_OPCODES()
	1424	+ {OpcodeInfo::all, {Size16, 0xFF, _6}, {r_m16}, U },
	1425	+ {OpcodeInfo::ia32, {0xFF, _6}, {r_m32}, U },
	1426	+ {OpcodeInfo::em64t, {0xFF, _6}, {r_m64}, U },
	1427	+
	1428	+ {OpcodeInfo::all, {Size16, 0x50\|rw }, {r16}, U },
	1429	+ {OpcodeInfo::ia32, {0x50\|rd }, {r32}, U },
	1430	+ {OpcodeInfo::em64t, {0x50\|rd }, {r64}, U },
	1431	+
	1432	+ {OpcodeInfo::all, {0x6A}, {imm8}, U },
	1433	+ {OpcodeInfo::all, {Size16, 0x68}, {imm16}, U },
	1434	+ {OpcodeInfo::ia32, {0x68}, {imm32}, U },
	1435	+// {OpcodeInfo::em64t, {0x68}, {imm64}, U },
	1436	+END_OPCODES()
	1437	+END_MNEMONIC()
	1438	+
	1439	+BEGIN_MNEMONIC(PUSHFD, MF_USES_FLAGS, N)
	1440	+BEGIN_OPCODES()
	1441	+ {OpcodeInfo::all, {0x9C}, {}, N },
	1442	+END_OPCODES()
	1443	+END_MNEMONIC()
	1444	+
	1445	+
	1446	+BEGIN_MNEMONIC(RET, MF_NONE, N)
	1447	+BEGIN_OPCODES()
	1448	+ {OpcodeInfo::all, {0xC3}, {}, N },
	1449	+ {OpcodeInfo::all, {0xC2, iw}, {imm16}, U },
	1450	+END_OPCODES()
	1451	+END_MNEMONIC()
	1452	+
	1453	+#define DEFINE_SETcc_MNEMONIC( cc ) \
	1454	+ BEGIN_MNEMONIC(SET##cc, MF_USES_FLAGS\|MF_CONDITIONAL, DU) \
	1455	+BEGIN_OPCODES() \
	1456	+ {OpcodeInfo::all, {0x0F, 0x90 + ConditionMnemonic_##cc}, {r_m8}, DU }, \
	1457	+END_OPCODES() \
	1458	+END_MNEMONIC()
	1459	+
	1460	+DEFINE_SETcc_MNEMONIC(O)
	1461	+DEFINE_SETcc_MNEMONIC(NO)
	1462	+DEFINE_SETcc_MNEMONIC(B)
	1463	+DEFINE_SETcc_MNEMONIC(NB)
	1464	+DEFINE_SETcc_MNEMONIC(Z)
	1465	+DEFINE_SETcc_MNEMONIC(NZ)
	1466	+DEFINE_SETcc_MNEMONIC(BE)
	1467	+DEFINE_SETcc_MNEMONIC(NBE)
	1468	+
	1469	+DEFINE_SETcc_MNEMONIC(S)
	1470	+DEFINE_SETcc_MNEMONIC(NS)
	1471	+DEFINE_SETcc_MNEMONIC(P)
	1472	+DEFINE_SETcc_MNEMONIC(NP)
	1473	+DEFINE_SETcc_MNEMONIC(L)
	1474	+DEFINE_SETcc_MNEMONIC(NL)
	1475	+DEFINE_SETcc_MNEMONIC(LE)
	1476	+DEFINE_SETcc_MNEMONIC(NLE)
	1477	+
	1478	+#undef DEFINE_SETcc_MNEMONIC
	1479	+
	1480	+#define DEFINE_SHIFT_MNEMONIC(nam, slash_num, flags) \
	1481	+BEGIN_MNEMONIC(nam, flags, DU_U) \
	1482	+BEGIN_OPCODES()\
	1483	+ /* D0 & D1 opcodes are added w/o 2nd operand (1) because */\
	1484	+ /* they are used for decoding only so only instruction length is needed */\
	1485	+ {OpcodeInfo::decoder, {0xD0, slash_num}, {r_m8/,const_1/}, DU },\
	1486	+ {OpcodeInfo::all, {0xD2, slash_num}, {r_m8, CL}, DU_U },\
	1487	+ {OpcodeInfo::all, {0xC0, slash_num, ib}, {r_m8, imm8}, DU_U },\
	1488	+\
	1489	+ {OpcodeInfo::decoder, {Size16, 0xD1, slash_num}, {r_m16/,const_1/}, DU },\
	1490	+ {OpcodeInfo::all, {Size16, 0xD3, slash_num}, {r_m16, CL}, DU_U },\
	1491	+ {OpcodeInfo::all, {Size16, 0xC1, slash_num, ib}, {r_m16, imm8 }, DU_U },\
	1492	+\
	1493	+ {OpcodeInfo::decoder, {0xD1, slash_num}, {r_m32/,const_1/}, DU },\
	1494	+ {OpcodeInfo::decoder64, {REX_W, 0xD1, slash_num}, {r_m64/,const_1/}, DU },\
	1495	+\
	1496	+ {OpcodeInfo::all, {0xD3, slash_num}, {r_m32, CL}, DU_U },\
	1497	+ {OpcodeInfo::em64t, {REX_W, 0xD3, slash_num}, {r_m64, CL}, DU_U },\
	1498	+\
	1499	+ {OpcodeInfo::all, {0xC1, slash_num, ib}, {r_m32, imm8}, DU_U },\
	1500	+ {OpcodeInfo::em64t, {REX_W, 0xC1, slash_num, ib}, {r_m64, imm8}, DU_U },\
	1501	+END_OPCODES()\
	1502	+END_MNEMONIC()
	1503	+
	1504	+
	1505	+DEFINE_SHIFT_MNEMONIC(ROL, _0, MF_AFFECTS_FLAGS)
	1506	+DEFINE_SHIFT_MNEMONIC(ROR, _1, MF_AFFECTS_FLAGS)
	1507	+DEFINE_SHIFT_MNEMONIC(RCL, _2, MF_AFFECTS_FLAGS\|MF_USES_FLAGS)
	1508	+DEFINE_SHIFT_MNEMONIC(RCR, _3, MF_AFFECTS_FLAGS\|MF_USES_FLAGS)
	1509	+
	1510	+DEFINE_SHIFT_MNEMONIC(SAL, _4, MF_AFFECTS_FLAGS)
	1511	+DEFINE_SHIFT_MNEMONIC(SHR, _5, MF_AFFECTS_FLAGS)
	1512	+DEFINE_SHIFT_MNEMONIC(SAR, _7, MF_AFFECTS_FLAGS)
	1513	+
	1514	+#undef DEFINE_SHIFT_MNEMONIC
	1515	+
	1516	+BEGIN_MNEMONIC(SHLD, MF_AFFECTS_FLAGS, N)
	1517	+BEGIN_OPCODES()
	1518	+ {OpcodeInfo::all, {0x0F, 0xA5}, {r_m32, r32, CL}, DU_DU_U },
	1519	+ {OpcodeInfo::all, {0x0F, 0xA4}, {r_m32, r32, imm8}, DU_DU_U },
	1520	+END_OPCODES()
	1521	+END_MNEMONIC()
	1522	+
	1523	+BEGIN_MNEMONIC(SHRD, MF_AFFECTS_FLAGS, N)
	1524	+// TODO: the def/use info is wrong
	1525	+BEGIN_OPCODES()
	1526	+ {OpcodeInfo::all, {0x0F, 0xAD}, {r_m32, r32, CL}, DU_DU_U },
	1527	+END_OPCODES()
	1528	+END_MNEMONIC()
	1529	+
	1530	+
	1531	+BEGIN_MNEMONIC(SUBSD, MF_NONE, DU_U)
	1532	+BEGIN_OPCODES()
	1533	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x5C, _r}, {xmm64, xmm_m64}, DU_U },
	1534	+END_OPCODES()
	1535	+END_MNEMONIC()
	1536	+
	1537	+BEGIN_MNEMONIC(SUBSS, MF_NONE, DU_U)
	1538	+BEGIN_OPCODES()
	1539	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x5C, _r}, {xmm32, xmm_m32}, DU_U },
	1540	+END_OPCODES()
	1541	+END_MNEMONIC()
	1542	+
	1543	+BEGIN_MNEMONIC(TEST, MF_AFFECTS_FLAGS, U_U)
	1544	+BEGIN_OPCODES()
	1545	+
	1546	+ {OpcodeInfo::decoder, {0xA8, ib}, { AL, imm8}, U_U },
	1547	+ {OpcodeInfo::decoder, {0xA9, iw}, { AX, imm16}, U_U },
	1548	+ {OpcodeInfo::decoder, {0xA9, id}, { EAX, imm32}, U_U },
	1549	+ {OpcodeInfo::decoder64, {REX_W, 0xA9, id}, { RAX, imm32s}, U_U },
	1550	+
	1551	+ {OpcodeInfo::all, {0xF6, _0, ib}, {r_m8,imm8}, U_U },
	1552	+
	1553	+ {OpcodeInfo::all, {Size16, 0xF7, _0, iw}, {r_m16,imm16}, U_U },
	1554	+ {OpcodeInfo::all, {0xF7, _0, id}, {r_m32,imm32}, U_U },
	1555	+ {OpcodeInfo::em64t, {REX_W, 0xF7, _0, id}, {r_m64,imm32s}, U_U },
	1556	+
	1557	+ {OpcodeInfo::all, {0x84, _r}, {r_m8,r8}, U_U },
	1558	+
	1559	+ {OpcodeInfo::all, {Size16, 0x85, _r}, {r_m16,r16}, U_U },
	1560	+ {OpcodeInfo::all, {0x85, _r}, {r_m32,r32}, U_U },
	1561	+ {OpcodeInfo::em64t, {REX_W, 0x85, _r}, {r_m64,r64}, U_U },
	1562	+END_OPCODES()
	1563	+END_MNEMONIC()
	1564	+
	1565	+
	1566	+BEGIN_MNEMONIC(UCOMISD, MF_AFFECTS_FLAGS, U_U)
	1567	+BEGIN_OPCODES()
	1568	+ {OpcodeInfo::all, {0x66, 0x0F, 0x2E, _r}, {xmm64, xmm_m64}, U_U },
	1569	+END_OPCODES()
	1570	+END_MNEMONIC()
	1571	+
	1572	+BEGIN_MNEMONIC(UCOMISS, MF_AFFECTS_FLAGS, U_U)
	1573	+BEGIN_OPCODES()
	1574	+ {OpcodeInfo::all, {0x0F, 0x2E, _r}, {xmm32, xmm_m32}, U_U },
	1575	+END_OPCODES()
	1576	+END_MNEMONIC()
	1577	+
	1578	+BEGIN_MNEMONIC(COMISD, MF_AFFECTS_FLAGS, U_U)
	1579	+BEGIN_OPCODES()
	1580	+ {OpcodeInfo::all, {0x66, 0x0F, 0x2F, _r}, {xmm64, xmm_m64}, U_U },
	1581	+END_OPCODES()
	1582	+END_MNEMONIC()
	1583	+
	1584	+BEGIN_MNEMONIC(COMISS, MF_AFFECTS_FLAGS, U_U)
	1585	+BEGIN_OPCODES()
	1586	+ {OpcodeInfo::all, {0x0F, 0x2F, _r}, {xmm32, xmm_m32}, U_U },
	1587	+END_OPCODES()
	1588	+END_MNEMONIC()
	1589	+
	1590	+BEGIN_MNEMONIC(XORPD, MF_SAME_ARG_NO_USE\|MF_SYMMETRIC, DU_U)
	1591	+BEGIN_OPCODES()
	1592	+ //Note: they're actually 128 bits
	1593	+ {OpcodeInfo::all, {0x66, 0x0F, 0x57, _r}, {xmm64, xmm_m64}, DU_U },
	1594	+END_OPCODES()
	1595	+END_MNEMONIC()
	1596	+
	1597	+BEGIN_MNEMONIC(XORPS, MF_SAME_ARG_NO_USE\|MF_SYMMETRIC, DU_U)
	1598	+BEGIN_OPCODES()
	1599	+ //Note: they're actually 128 bits
	1600	+ {OpcodeInfo::all, {0x0F, 0x57, _r}, {xmm32, xmm_m32}, DU_U },
	1601	+END_OPCODES()
	1602	+END_MNEMONIC()
	1603	+
	1604	+BEGIN_MNEMONIC(CVTDQ2PD, MF_NONE, D_U )
	1605	+BEGIN_OPCODES()
	1606	+ //Note: they're actually 128 bits
	1607	+ {OpcodeInfo::all, {0xF3, 0x0F, 0xE6}, {xmm64, xmm_m64}, D_U },
	1608	+END_OPCODES()
	1609	+END_MNEMONIC()
	1610	+
	1611	+BEGIN_MNEMONIC(CVTDQ2PS, MF_NONE, D_U )
	1612	+BEGIN_OPCODES()
	1613	+ //Note: they're actually 128 bits
	1614	+ {OpcodeInfo::all, {0x0F, 0x5B, _r}, {xmm32, xmm_m32}, D_U },
	1615	+END_OPCODES()
	1616	+END_MNEMONIC()
	1617	+
	1618	+BEGIN_MNEMONIC(CVTTPD2DQ, MF_NONE, D_U )
	1619	+BEGIN_OPCODES()
	1620	+ //Note: they're actually 128 bits
	1621	+ {OpcodeInfo::all, {0x66, 0x0F, 0xE6}, {xmm64, xmm_m64}, D_U },
	1622	+END_OPCODES()
	1623	+END_MNEMONIC()
	1624	+
	1625	+BEGIN_MNEMONIC(CVTTPS2DQ, MF_NONE, D_U )
	1626	+BEGIN_OPCODES()
	1627	+ //Note: they're actually 128 bits
	1628	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x5B, _r}, {xmm32, xmm_m32}, D_U },
	1629	+END_OPCODES()
	1630	+END_MNEMONIC()
	1631	+
	1632	+//
	1633	+// String operations
	1634	+//
	1635	+BEGIN_MNEMONIC(STD, MF_AFFECTS_FLAGS, N)
	1636	+BEGIN_OPCODES()
	1637	+ {OpcodeInfo::all, {0xFD}, {}, N },
	1638	+END_OPCODES()
	1639	+END_MNEMONIC()
	1640	+
	1641	+BEGIN_MNEMONIC(CLD, MF_AFFECTS_FLAGS, N)
	1642	+BEGIN_OPCODES()
	1643	+ {OpcodeInfo::all, {0xFC}, {}, N },
	1644	+END_OPCODES()
	1645	+END_MNEMONIC()
	1646	+
	1647	+BEGIN_MNEMONIC(SCAS, MF_AFFECTS_FLAGS, N)
	1648	+// to be symmetric, this mnemonic must have either m32 or RegName_EAX
	1649	+// but as long, as Jitrino's CG does not use the mnemonic, leaving it
	1650	+// in its natural form
	1651	+BEGIN_OPCODES()
	1652	+ {OpcodeInfo::all, {0xAF}, {}, N },
	1653	+END_OPCODES()
	1654	+END_MNEMONIC()
	1655	+
	1656	+BEGIN_MNEMONIC(STOS, MF_AFFECTS_FLAGS, DU_DU_U)
	1657	+BEGIN_OPCODES()
	1658	+ {OpcodeInfo::all, {0xAB}, {EDI, ECX, EAX}, DU_DU_U },
	1659	+ {OpcodeInfo::all, {0xAA}, {EDI, ECX, AL}, DU_DU_U },
	1660	+ {OpcodeInfo::em64t, {REX_W, 0xAB}, {RDI, RCX, RAX}, DU_DU_U },
	1661	+END_OPCODES()
	1662	+END_MNEMONIC()
	1663	+
	1664	+/*
	1665	+MOVS and CMPS are the special cases.
	1666	+Most the code in both CG and Encoder do not expect 2 memory operands.
	1667	+Also, they are not supposed to setup constrains on which register the
	1668	+memory reference must reside - m8,m8 or m32,m32 is not the choice.
	1669	+We can't use r8,r8 either - will have problem with 8bit EDI, ESI.
	1670	+So, as the workaround we do r32,r32 and specify size of the operand through
	1671	+the specific mnemonic - the same is in the codegen.
	1672	+*/
	1673	+BEGIN_MNEMONIC(MOVS8, MF_NONE, DU_DU_DU)
	1674	+BEGIN_OPCODES()
	1675	+ {OpcodeInfo::ia32, {0xA4}, {r32,r32,ECX}, DU_DU_DU },
	1676	+ {OpcodeInfo::em64t, {0xA4}, {r64,r64,RCX}, DU_DU_DU },
	1677	+END_OPCODES()
	1678	+END_MNEMONIC()
	1679	+
	1680	+BEGIN_MNEMONIC(MOVS16, MF_NONE, DU_DU_DU)
	1681	+BEGIN_OPCODES()
	1682	+ {OpcodeInfo::ia32, {Size16, 0xA5}, {r32,r32,ECX}, DU_DU_DU },
	1683	+ {OpcodeInfo::em64t, {Size16, 0xA5}, {r64,r64,RCX}, DU_DU_DU },
	1684	+END_OPCODES()
	1685	+END_MNEMONIC()
	1686	+
	1687	+BEGIN_MNEMONIC(MOVS32, MF_NONE, DU_DU_DU)
	1688	+BEGIN_OPCODES()
	1689	+ {OpcodeInfo::ia32, {0xA5}, {r32,r32,ECX}, DU_DU_DU },
	1690	+ {OpcodeInfo::em64t, {0xA5}, {r64,r64,RCX}, DU_DU_DU },
	1691	+END_OPCODES()
	1692	+END_MNEMONIC()
	1693	+
	1694	+BEGIN_MNEMONIC(MOVS64, MF_NONE, DU_DU_DU)
	1695	+BEGIN_OPCODES()
	1696	+ {OpcodeInfo::em64t, {REX_W,0xA5}, {r64,r64,RCX}, DU_DU_DU },
	1697	+END_OPCODES()
	1698	+END_MNEMONIC()
	1699	+
	1700	+BEGIN_MNEMONIC(CMPSB, MF_AFFECTS_FLAGS, DU_DU_DU)
	1701	+BEGIN_OPCODES()
	1702	+ {OpcodeInfo::ia32, {0xA6}, {ESI,EDI,ECX}, DU_DU_DU },
	1703	+ {OpcodeInfo::em64t, {0xA6}, {RSI,RDI,RCX}, DU_DU_DU },
	1704	+END_OPCODES()
	1705	+END_MNEMONIC()
	1706	+
	1707	+BEGIN_MNEMONIC(CMPSW, MF_AFFECTS_FLAGS, DU_DU_DU)
	1708	+BEGIN_OPCODES()
	1709	+ {OpcodeInfo::ia32, {Size16, 0xA7}, {ESI,EDI,ECX}, DU_DU_DU },
	1710	+ {OpcodeInfo::em64t, {Size16, 0xA7}, {RSI,RDI,RCX}, DU_DU_DU },
	1711	+END_OPCODES()
	1712	+END_MNEMONIC()
	1713	+
	1714	+BEGIN_MNEMONIC(CMPSD, MF_AFFECTS_FLAGS, DU_DU_DU)
	1715	+BEGIN_OPCODES()
	1716	+ {OpcodeInfo::ia32, {0xA7}, {ESI,EDI,ECX}, DU_DU_DU },
	1717	+ {OpcodeInfo::em64t, {0xA7}, {RSI,RDI,RCX}, DU_DU_DU },
	1718	+END_OPCODES()
	1719	+END_MNEMONIC()
	1720	+
	1721	+
	1722	+BEGIN_MNEMONIC(WAIT, MF_AFFECTS_FLAGS, N)
	1723	+BEGIN_OPCODES()
	1724	+ {OpcodeInfo::all, {0x9B}, {}, N },
	1725	+END_OPCODES()
	1726	+END_MNEMONIC()
	1727	+
	1728	+//
	1729	+// ~String operations
	1730	+//
	1731	+
	1732	+//
	1733	+//Note: the instructions below added for the sake of disassembling routine.
	1734	+// They need to have flags, params and params usage to be defined more precisely.
	1735	+//
	1736	+BEGIN_MNEMONIC(LEAVE, MF_NONE, N)
	1737	+BEGIN_OPCODES()
	1738	+ {OpcodeInfo::decoder, {0xC9}, {}, N },
	1739	+END_OPCODES()
	1740	+END_MNEMONIC()
	1741	+
	1742	+BEGIN_MNEMONIC(ENTER, MF_NONE, N)
	1743	+BEGIN_OPCODES()
	1744	+ {OpcodeInfo::decoder, {0xC8, iw, ib}, {imm16, imm8}, N },
	1745	+END_OPCODES()
	1746	+END_MNEMONIC()
	1747	+
	1748	+BEGIN_MNEMONIC(PADDB, MF_NONE, DU_U)
	1749	+BEGIN_OPCODES()
	1750	+ {OpcodeInfo::all, {0x66, 0x0F, 0xFC, _r}, {xmm64, xmm_m64}, DU_U },
	1751	+END_OPCODES()
	1752	+END_MNEMONIC()
	1753	+
	1754	+BEGIN_MNEMONIC(PADDW, MF_NONE, DU_U)
	1755	+BEGIN_OPCODES()
	1756	+ {OpcodeInfo::all, {0x66, 0x0F, 0xFD, _r}, {xmm64, xmm_m64}, DU_U },
	1757	+END_OPCODES()
	1758	+END_MNEMONIC()
	1759	+
	1760	+BEGIN_MNEMONIC(PADDD, MF_NONE, DU_U)
	1761	+BEGIN_OPCODES()
	1762	+ {OpcodeInfo::all, {0x66, 0x0F, 0xFE, _r}, {xmm64, xmm_m64}, DU_U },
	1763	+END_OPCODES()
	1764	+END_MNEMONIC()
	1765	+
	1766	+BEGIN_MNEMONIC(PSUBB, MF_NONE, DU_U)
	1767	+BEGIN_OPCODES()
	1768	+ {OpcodeInfo::all, {0x66, 0x0F, 0xF8, _r}, {xmm64, xmm_m64}, DU_U },
	1769	+END_OPCODES()
	1770	+END_MNEMONIC()
	1771	+
	1772	+BEGIN_MNEMONIC(PSUBW, MF_NONE, DU_U)
	1773	+BEGIN_OPCODES()
	1774	+ {OpcodeInfo::all, {0x66, 0x0F, 0xF9, _r}, {xmm64, xmm_m64}, DU_U },
	1775	+END_OPCODES()
	1776	+END_MNEMONIC()
	1777	+
	1778	+BEGIN_MNEMONIC(PSUBD, MF_NONE, DU_U)
	1779	+BEGIN_OPCODES()
	1780	+ {OpcodeInfo::all, {0x66, 0x0F, 0xFA, _r}, {xmm64, xmm_m64}, DU_U },
	1781	+END_OPCODES()
	1782	+END_MNEMONIC()
	1783	+
	1784	+BEGIN_MNEMONIC(PMULLW, MF_NONE, DU_U)
	1785	+BEGIN_OPCODES()
	1786	+ {OpcodeInfo::all, {0x66, 0x0F, 0xD5, _r}, {xmm64, xmm_m64}, DU_U },
	1787	+END_OPCODES()
	1788	+END_MNEMONIC()
	1789	+
	1790	+BEGIN_MNEMONIC(PMULLD, MF_NONE, DU_U)
	1791	+BEGIN_OPCODES()
	1792	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x40, _r}, {xmm64, xmm_m64}, DU_U },
	1793	+END_OPCODES()
	1794	+END_MNEMONIC()
	1795	+
	1796	+BEGIN_MNEMONIC(PSLLW, MF_NONE, DU_U)
	1797	+BEGIN_OPCODES()
	1798	+ {OpcodeInfo::all, {0x66, 0x0F, 0xF1, _r}, {xmm64, xmm_m64}, DU_U },
	1799	+ {OpcodeInfo::all, {0x66, 0x0F, 0x71, _6, ib}, {xmm64, imm8}, DU_U },
	1800	+END_OPCODES()
	1801	+END_MNEMONIC()
	1802	+
	1803	+BEGIN_MNEMONIC(PSLLD, MF_NONE, DU_U)
	1804	+BEGIN_OPCODES()
	1805	+ {OpcodeInfo::all, {0x66, 0x0F, 0xF2, _r}, {xmm64, xmm_m64}, DU_U },
	1806	+ {OpcodeInfo::all, {0x66, 0x0F, 0x72, _6, ib}, {xmm64, imm8}, DU_U },
	1807	+END_OPCODES()
	1808	+END_MNEMONIC()
	1809	+
	1810	+BEGIN_MNEMONIC(PSRAW, MF_NONE, DU_U)
	1811	+BEGIN_OPCODES()
	1812	+ {OpcodeInfo::all, {0x66, 0x0F, 0xE1, _r}, {xmm64, xmm_m64}, DU_U },
	1813	+ {OpcodeInfo::all, {0x66, 0x0F, 0x71, _4, ib}, {xmm64, imm8}, DU_U },
	1814	+END_OPCODES()
	1815	+END_MNEMONIC()
	1816	+
	1817	+BEGIN_MNEMONIC(PSRAD, MF_NONE, DU_U)
	1818	+BEGIN_OPCODES()
	1819	+ {OpcodeInfo::all, {0x66, 0x0F, 0xE2, _r}, {xmm64, xmm_m64}, DU_U },
	1820	+ {OpcodeInfo::all, {0x66, 0x0F, 0x72, _4, ib}, {xmm64, imm8}, DU_U },
	1821	+END_OPCODES()
	1822	+END_MNEMONIC()
	1823	+
	1824	+BEGIN_MNEMONIC(PSRLW, MF_NONE, DU_U)
	1825	+BEGIN_OPCODES()
	1826	+ {OpcodeInfo::all, {0x66, 0x0F, 0xD1, _r}, {xmm64, xmm_m64}, DU_U },
	1827	+ {OpcodeInfo::all, {0x66, 0x0F, 0x71, _2, ib}, {xmm64, imm8}, DU_U },
	1828	+END_OPCODES()
	1829	+END_MNEMONIC()
	1830	+
	1831	+BEGIN_MNEMONIC(PSRLD, MF_NONE, DU_U)
	1832	+BEGIN_OPCODES()
	1833	+ {OpcodeInfo::all, {0x66, 0x0F, 0xD2, _r}, {xmm64, xmm_m64}, DU_U },
	1834	+ {OpcodeInfo::all, {0x66, 0x0F, 0x72, _2, ib}, {xmm64, imm8}, DU_U },
	1835	+END_OPCODES()
	1836	+END_MNEMONIC()
	1837	+
	1838	+BEGIN_MNEMONIC(PMOVSXBW, MF_NONE, DU_U)
	1839	+BEGIN_OPCODES()
	1840	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x20, _r}, {xmm64, xmm_m64}, DU_U },
	1841	+END_OPCODES()
	1842	+END_MNEMONIC()
	1843	+
	1844	+BEGIN_MNEMONIC(PSHUFB, MF_NONE, DU_U)
	1845	+BEGIN_OPCODES()
	1846	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x00, _r}, {xmm64, xmm_m64}, DU_U },
	1847	+END_OPCODES()
	1848	+END_MNEMONIC()
	1849	+
	1850	+BEGIN_MNEMONIC(PSHUFD, MF_NONE, D_U_U)
	1851	+BEGIN_OPCODES()
	1852	+ {OpcodeInfo::all, {0x66, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U },
	1853	+END_OPCODES()
	1854	+END_MNEMONIC()
	1855	+
	1856	+BEGIN_MNEMONIC(PSHUFLW, MF_NONE, D_U_U)
	1857	+BEGIN_OPCODES()
	1858	+ {OpcodeInfo::all, {0xF2, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U },
	1859	+END_OPCODES()
	1860	+END_MNEMONIC()
	1861	+
	1862	+BEGIN_MNEMONIC(PSHUFHW, MF_NONE, D_U_U)
	1863	+BEGIN_OPCODES()
	1864	+ {OpcodeInfo::all, {0xF3, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U },
	1865	+END_OPCODES()
	1866	+END_MNEMONIC()
	1867	+
	1868	+BEGIN_MNEMONIC(PHADDSW, MF_NONE, DU_U)
	1869	+BEGIN_OPCODES()
	1870	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x03, _r}, {xmm64, xmm_m64}, DU_U },
	1871	+END_OPCODES()
	1872	+END_MNEMONIC()
	1873	+
	1874	+BEGIN_MNEMONIC(PHADDW, MF_NONE, DU_U)
	1875	+BEGIN_OPCODES()
	1876	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x01, _r}, {xmm64, xmm_m64}, DU_U },
	1877	+END_OPCODES()
	1878	+END_MNEMONIC()
	1879	+
	1880	+BEGIN_MNEMONIC(PHADDD, MF_NONE, DU_U)
	1881	+BEGIN_OPCODES()
	1882	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x02, _r}, {xmm64, xmm_m64}, DU_U },
	1883	+END_OPCODES()
	1884	+END_MNEMONIC()
	1885	+
	1886	+BEGIN_MNEMONIC(PHSUBSW, MF_NONE, DU_U)
	1887	+BEGIN_OPCODES()
	1888	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x07, _r}, {xmm64, xmm_m64}, DU_U },
	1889	+END_OPCODES()
	1890	+END_MNEMONIC()
	1891	+
	1892	+BEGIN_MNEMONIC(PHSUBW, MF_NONE, DU_U)
	1893	+BEGIN_OPCODES()
	1894	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x05, _r}, {xmm64, xmm_m64}, DU_U },
	1895	+END_OPCODES()
	1896	+END_MNEMONIC()
	1897	+
	1898	+BEGIN_MNEMONIC(PHSUBD, MF_NONE, DU_U)
	1899	+BEGIN_OPCODES()
	1900	+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x06, _r}, {xmm64, xmm_m64}, DU_U },
	1901	+END_OPCODES()
	1902	+END_MNEMONIC()
	1903	+
	1904	+BEGIN_MNEMONIC(PEXTRB, MF_NONE, D_U_U)
	1905	+BEGIN_OPCODES()
	1906	+ {OpcodeInfo::all, {0x66, 0x0F, 0x3A, 0x14, _r, ib}, {r32, xmm64, imm8}, D_U_U },
	1907	+END_OPCODES()
	1908	+END_MNEMONIC()
	1909	+
	1910	+BEGIN_MNEMONIC(PEXTRW, MF_NONE, D_U_U)
	1911	+BEGIN_OPCODES()
	1912	+ {OpcodeInfo::all, {0x66, 0x0F, 0xC5, _r, ib}, {r32, xmm64, imm8}, D_U_U },
	1913	+END_OPCODES()
	1914	+END_MNEMONIC()
	1915	+
	1916	+BEGIN_MNEMONIC(PEXTRD, MF_NONE, D_U_U)
	1917	+BEGIN_OPCODES()
	1918	+ {OpcodeInfo::all, {0x66, 0x0F, 0x3A, 0x16, _r, ib}, {r_m32, xmm64, imm8}, D_U_U },
	1919	+END_OPCODES()
	1920	+END_MNEMONIC()
	1921	+
	1922	+BEGIN_MNEMONIC(MOVDQA, MF_NONE\|MF_SYMMETRIC, D_U)
	1923	+BEGIN_OPCODES()
	1924	+ {OpcodeInfo::all, {0x66, 0x0F, 0x6F, _r}, {xmm64, xmm_m64}, D_U },
	1925	+ //The encoder cannot properly look up when operands are symmetric but opcode is not:
	1926	+ //{OpcodeInfo::all, {0x66, 0x0F, 0x7F, _r}, {xmm_m128, xmm128}, D_U },
	1927	+END_OPCODES()
	1928	+END_MNEMONIC()
	1929	+
	1930	+}; // ~masterEncodingTable[]
	1931	+
	1932	+ENCODER_NAMESPACE_END
	1933	+
	1934	+ENCODER_NAMESPACE_START
	1935	+
	1936	+static int compareMnemonicInfo(const void* info1, const void* info2)
	1937	+{
	1938	+ Mnemonic id1, id2;
	1939	+
	1940	+ id1 = ((const MnemonicInfo*) info1)->mn;
	1941	+ id2 = ((const MnemonicInfo*) info2)->mn;
	1942	+ if (id1 < id2)
	1943	+ return -1;
	1944	+ if (id1 > id2)
	1945	+ return 1;
	1946	+ return 0;
	1947	+}
	1948	+
	1949	+int EncoderBase::buildTable(void)
	1950	+{
	1951	+ // A check: all mnemonics must be covered
	1952	+ assert(COUNTOF(masterEncodingTable) == Mnemonic_Count);
	1953	+
	1954	+ // sort out the mnemonics so the list become ordered
	1955	+ qsort(masterEncodingTable, Mnemonic_Count, sizeof(MnemonicInfo), compareMnemonicInfo);
	1956	+
	1957	+ //
	1958	+ // clear the things
	1959	+ //
	1960	+ memset(opcodesHashMap, NOHASH, sizeof(opcodesHashMap));
	1961	+ memset(opcodes, 0, sizeof(opcodes));
	1962	+ //
	1963	+ // and, finally, build it
	1964	+ for (unsigned i=0; i<Mnemonic_Count; i++) {
	1965	+ assert((Mnemonic)i == (masterEncodingTable + i)->mn);
	1966	+ buildMnemonicDesc(masterEncodingTable+i);
	1967	+ }
	1968	+ return 0;
	1969	+}
	1970	+
	1971	+void EncoderBase::buildMnemonicDesc(const MnemonicInfo * minfo)
	1972	+{
	1973	+ MnemonicDesc& mdesc = mnemonics[minfo->mn];
	1974	+ mdesc.mn = minfo->mn;
	1975	+ mdesc.flags = minfo->flags;
	1976	+ mdesc.roles = minfo->roles;
	1977	+ mdesc.name = minfo->name;
	1978	+
	1979	+ //
	1980	+ // fill the used opcodes
	1981	+ //
	1982	+ for (unsigned i=0, oindex=0; i<COUNTOF(minfo->opcodes); i++) {
	1983	+
	1984	+ const OpcodeInfo& oinfo = minfo->opcodes[i];
	1985	+ OpcodeDesc& odesc = opcodes[minfo->mn][oindex];
	1986	+ // last opcode ?
	1987	+ if (oinfo.opcode[0] == OpcodeByteKind_LAST) {
	1988	+ // mark the opcode 'last', exit
	1989	+ odesc.opcode_len = 0;
	1990	+ odesc.last = 1;
	1991	+ break;
	1992	+ }
	1993	+ odesc.last = 0;
	1994	+#ifdef _EM64T_
	1995	+ if (oinfo.platf == OpcodeInfo::ia32) { continue; }
	1996	+ if (oinfo.platf == OpcodeInfo::decoder32) { continue; }
	1997	+#else
	1998	+ if (oinfo.platf == OpcodeInfo::em64t) { continue; }
	1999	+ if (oinfo.platf == OpcodeInfo::decoder64) { continue; }
	2000	+#endif
	2001	+ if (oinfo.platf == OpcodeInfo::decoder64 \|\|
	2002	+ oinfo.platf == OpcodeInfo::decoder32) {
	2003	+ odesc.platf = OpcodeInfo::decoder;
	2004	+ }
	2005	+ else {
	2006	+ odesc.platf = (char)oinfo.platf;
	2007	+ }
	2008	+ //
	2009	+ // fill out opcodes
	2010	+ //
	2011	+ unsigned j = 0;
	2012	+ odesc.opcode_len = 0;
	2013	+ for(; oinfo.opcode[j]; j++) {
	2014	+ unsigned opcod = oinfo.opcode[j];
	2015	+ unsigned kind = opcod&OpcodeByteKind_KindMask;
	2016	+ if (kind == OpcodeByteKind_REX_W) {
	2017	+ odesc.opcode[odesc.opcode_len++] = (unsigned char)0x48;
	2018	+ continue;
	2019	+ }
	2020	+ else if(kind != 0 && kind != OpcodeByteKind_ZeroOpcodeByte) {
	2021	+ break;
	2022	+ }
	2023	+ unsigned lowByte = (opcod & OpcodeByteKind_OpcodeMask);
	2024	+ odesc.opcode[odesc.opcode_len++] = (unsigned char)lowByte;
	2025	+ }
	2026	+ assert(odesc.opcode_len<5);
	2027	+ odesc.aux0 = odesc.aux1 = 0;
	2028	+ if (oinfo.opcode[j] != 0) {
	2029	+ odesc.aux0 = oinfo.opcode[j];
	2030	+ assert((odesc.aux0 & OpcodeByteKind_KindMask) != 0);
	2031	+ ++j;
	2032	+ if(oinfo.opcode[j] != 0) {
	2033	+ odesc.aux1 = oinfo.opcode[j];
	2034	+ assert((odesc.aux1 & OpcodeByteKind_KindMask) != 0);
	2035	+ }
	2036	+ }
	2037	+ else if (oinfo.roles.count>=2) {
	2038	+ if (((oinfo.opnds[0].kind&OpndKind_Mem) &&
	2039	+ (isRegKind(oinfo.opnds[1].kind))) \|\|
	2040	+ ((oinfo.opnds[1].kind&OpndKind_Mem) &&
	2041	+ (isRegKind(oinfo.opnds[0].kind)))) {
	2042	+ // Example: MOVQ xmm1, xmm/m64 has only opcodes
	2043	+ // same with SHRD
	2044	+ // Adding fake /r
	2045	+ odesc.aux0 = _r;
	2046	+ }
	2047	+ }
	2048	+ else if (oinfo.roles.count==1) {
	2049	+ if (oinfo.opnds[0].kind&OpndKind_Mem) {
	2050	+ // Example: SETcc r/m8, adding fake /0
	2051	+ odesc.aux0 = _0;
	2052	+ }
	2053	+ }
	2054	+ // check imm
	2055	+ if (oinfo.roles.count > 0 &&
	2056	+ (oinfo.opnds[0].kind == OpndKind_Imm \|\|
	2057	+ oinfo.opnds[oinfo.roles.count-1].kind == OpndKind_Imm)) {
	2058	+ // Example: CALL cd, PUSH imm32 - they fit both opnds[0] and
	2059	+ // opnds[oinfo.roles.count-1].
	2060	+ // The A3 opcode fits only opnds[0] - it's currently have
	2061	+ // MOV imm32, EAX. Looks ridiculous, but this is how the
	2062	+ // moffset is currently implemented. Will need to fix together
	2063	+ // with other usages of moff.
	2064	+ // adding fake /cd or fake /id
	2065	+ unsigned imm_opnd_index =
	2066	+ oinfo.opnds[0].kind == OpndKind_Imm ? 0 : oinfo.roles.count-1;
	2067	+ OpndSize sz = oinfo.opnds[imm_opnd_index].size;
	2068	+ unsigned imm_encode, coff_encode;
	2069	+ if (sz==OpndSize_8) {imm_encode = ib; coff_encode=cb; }
	2070	+ else if (sz==OpndSize_16) {imm_encode = iw; coff_encode=cw;}
	2071	+ else if (sz==OpndSize_32) {imm_encode = id; coff_encode=cd; }
	2072	+ else if (sz==OpndSize_64) {imm_encode = io; coff_encode=0xCC; }
	2073	+ else { assert(false); imm_encode=0xCC; coff_encode=0xCC; }
	2074	+ if (odesc.aux1 == 0) {
	2075	+ if (odesc.aux0==0) {
	2076	+ odesc.aux0 = imm_encode;
	2077	+ }
	2078	+ else {
	2079	+ if (odesc.aux0 != imm_encode && odesc.aux0 != coff_encode) {
	2080	+ odesc.aux1 = imm_encode;
	2081	+ }
	2082	+ }
	2083	+ }
	2084	+ else {
	2085	+ assert(odesc.aux1==imm_encode);
	2086	+ }
	2087	+
	2088	+ }
	2089	+
	2090	+ assert(sizeof(odesc.opnds) == sizeof(oinfo.opnds));
	2091	+ memcpy(odesc.opnds, oinfo.opnds,
	2092	+ sizeof(EncoderBase::OpndDesc)
	2093	+ * EncoderBase::MAX_NUM_OPCODE_OPERANDS);
	2094	+ odesc.roles = oinfo.roles;
	2095	+ odesc.first_opnd = 0;
	2096	+ if (odesc.opnds[0].reg != RegName_Null) {
	2097	+ ++odesc.first_opnd;
	2098	+ if (odesc.opnds[1].reg != RegName_Null) {
	2099	+ ++odesc.first_opnd;
	2100	+ }
	2101	+ }
	2102	+
	2103	+ if (odesc.platf == OpcodeInfo::decoder) {
	2104	+ // if the opcode is only for decoding info, then do not hash it.
	2105	+ ++oindex;
	2106	+ continue;
	2107	+ }
	2108	+
	2109	+ //
	2110	+ // check whether the operand info is a mask (i.e. r_m*).
	2111	+ // in this case, split the info to have separate entries for 'r'
	2112	+ // and for 'm'.
	2113	+ // the good news is that there can be only one such operand.
	2114	+ //
	2115	+ int opnd2split = -1;
	2116	+ for (unsigned k=0; k<oinfo.roles.count; k++) {
	2117	+ if ((oinfo.opnds[k].kind & OpndKind_Mem) &&
	2118	+ (OpndKind_Mem != oinfo.opnds[k].kind)) {
	2119	+ opnd2split = k;
	2120	+ break;
	2121	+ }
	2122	+ };
	2123	+
	2124	+ if (opnd2split == -1) {
	2125	+ // not a mask, hash it, store it, continue.
	2126	+ unsigned short hash = getHash(&oinfo);
	2127	+ opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
	2128	+ ++oindex;
	2129	+ continue;
	2130	+ };
	2131	+
	2132	+ OpcodeInfo storeItem = oinfo;
	2133	+ unsigned short hash;
	2134	+
	2135	+ // remove the memory part of the mask, and store only 'r' part
	2136	+ storeItem.opnds[opnd2split].kind = (OpndKind)(storeItem.opnds[opnd2split].kind & ~OpndKind_Mem);
	2137	+ hash = getHash(&storeItem);
	2138	+ if (opcodesHashMap[minfo->mn][hash] == NOHASH) {
	2139	+ opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
	2140	+ }
	2141	+ // else {
	2142	+ // do not overwrite if there is something there, just check that operands match
	2143	+ // the reason is that for some instructions there are several possibilities:
	2144	+ // say 'DEC r' may be encode as either '48+r' or 'FF /1', and I believe
	2145	+ // the first one is better for 'dec r'.
	2146	+ // as we're currently processing an opcode with memory part in operand,
	2147	+ // leave already filled items intact, so if there is 'OP reg' there, this
	2148	+ // better choice will be left in the table instead of 'OP r_m'
	2149	+ // }
	2150	+
	2151	+ // compute hash of memory-based operand, 'm' part in 'r_m'
	2152	+ storeItem.opnds[opnd2split].kind = OpndKind_Mem;
	2153	+ hash = getHash(&storeItem);
	2154	+ // should not happen: for the r_m opcodes, there is a possibility
	2155	+ // that hash value of 'r' part intersects with 'OP r' value, but it's
	2156	+ // impossible for 'm' part.
	2157	+ assert(opcodesHashMap[minfo->mn][hash] == NOHASH);
	2158	+ opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
	2159	+
	2160	+ ++oindex;
	2161	+ }
	2162	+}
	2163	+
	2164	+ENCODER_NAMESPACE_END

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.cpp

		@@ -0,0 +1,836 @@
	1	+/*
	2	+ * Copyright (C) 2012 The Android Open Source Project
	3	+ *
	4	+ * Licensed under the Apache License, Version 2.0 (the "License");
	5	+ * you may not use this file except in compliance with the License.
	6	+ * You may obtain a copy of the License at
	7	+ *
	8	+ * http://www.apache.org/licenses/LICENSE-2.0
	9	+ *
	10	+ * Unless required by applicable law or agreed to in writing, software
	11	+ * distributed under the License is distributed on an "AS IS" BASIS,
	12	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	+ * See the License for the specific language governing permissions and
	14	+ * limitations under the License.
	15	+ */
	16	+
	17	+#include <stdio.h>
	18	+#include <assert.h>
	19	+#include <limits.h>
	20	+#include "enc_base.h"
	21	+#include "enc_wrapper.h"
	22	+#include "dec_base.h"
	23	+#include "utils/Log.h"
	24	+
	25	+//#define PRINT_ENCODER_STREAM
	26	+bool dump_x86_inst = false;
	27	+
	28	+/**
	29	+ * @brief Provides mapping between PhysicalReg and RegName used by encoder
	30	+ * @param physicalReg The physical register
	31	+ * @return Returns encoder's register name
	32	+ */
	33	+static RegName mapFromPhysicalReg (int physicalReg)
	34	+{
	35	+ RegName reg = RegName_Null;
	36	+
	37	+ //Get mapping between PhysicalReg and RegName
	38	+ switch (physicalReg)
	39	+ {
	40	+ case PhysicalReg_EAX:
	41	+ reg = RegName_EAX;
	42	+ break;
	43	+ case PhysicalReg_EBX:
	44	+ reg = RegName_EBX;
	45	+ break;
	46	+ case PhysicalReg_ECX:
	47	+ reg = RegName_ECX;
	48	+ break;
	49	+ case PhysicalReg_EDX:
	50	+ reg = RegName_EDX;
	51	+ break;
	52	+ case PhysicalReg_EDI:
	53	+ reg = RegName_EDI;
	54	+ break;
	55	+ case PhysicalReg_ESI:
	56	+ reg = RegName_ESI;
	57	+ break;
	58	+ case PhysicalReg_ESP:
	59	+ reg = RegName_ESP;
	60	+ break;
	61	+ case PhysicalReg_EBP:
	62	+ reg = RegName_EBP;
	63	+ break;
	64	+ case PhysicalReg_XMM0:
	65	+ reg = RegName_XMM0;
	66	+ break;
	67	+ case PhysicalReg_XMM1:
	68	+ reg = RegName_XMM1;
	69	+ break;
	70	+ case PhysicalReg_XMM2:
	71	+ reg = RegName_XMM2;
	72	+ break;
	73	+ case PhysicalReg_XMM3:
	74	+ reg = RegName_XMM3;
	75	+ break;
	76	+ case PhysicalReg_XMM4:
	77	+ reg = RegName_XMM4;
	78	+ break;
	79	+ case PhysicalReg_XMM5:
	80	+ reg = RegName_XMM5;
	81	+ break;
	82	+ case PhysicalReg_XMM6:
	83	+ reg = RegName_XMM6;
	84	+ break;
	85	+ case PhysicalReg_XMM7:
	86	+ reg = RegName_XMM7;
	87	+ break;
	88	+ default:
	89	+ //We have no mapping
	90	+ reg = RegName_Null;
	91	+ break;
	92	+ }
	93	+
	94	+ return reg;
	95	+}
	96	+
	97	+//getRegSize, getAliasReg:
	98	+//OpndSize, RegName, OpndExt: enum enc_defs.h
	99	+inline void add_r(EncoderBase::Operands & args, int physicalReg, OpndSize sz, OpndExt ext = OpndExt_None) {
	100	+ if (sz == OpndSize_128)
	101	+ {
	102	+ //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined
	103	+ //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still
	104	+ //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx.
	105	+ sz = OpndSize_64;
	106	+ }
	107	+
	108	+ RegName reg = mapFromPhysicalReg (physicalReg);
	109	+ if (sz != getRegSize(reg)) {
	110	+ reg = getAliasReg(reg, sz);
	111	+ }
	112	+ args.add(EncoderBase::Operand(reg, ext));
	113	+}
	114	+inline void add_m(EncoderBase::Operands & args, int baseReg, int disp, OpndSize sz, OpndExt ext = OpndExt_None) {
	115	+ if (sz == OpndSize_128)
	116	+ {
	117	+ //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined
	118	+ //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still
	119	+ //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx.
	120	+ sz = OpndSize_64;
	121	+ }
	122	+
	123	+ args.add(EncoderBase::Operand(sz,
	124	+ mapFromPhysicalReg (baseReg),
	125	+ RegName_Null, 0,
	126	+ disp, ext));
	127	+}
	128	+inline void add_m_scale(EncoderBase::Operands & args, int baseReg, int indexReg, int scale,
	129	+ OpndSize sz, OpndExt ext = OpndExt_None) {
	130	+ if (sz == OpndSize_128)
	131	+ {
	132	+ //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined
	133	+ //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still
	134	+ //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx.
	135	+ sz = OpndSize_64;
	136	+ }
	137	+
	138	+ args.add(EncoderBase::Operand(sz,
	139	+ mapFromPhysicalReg (baseReg),
	140	+ mapFromPhysicalReg (indexReg), scale,
	141	+ 0, ext));
	142	+}
	143	+inline void add_m_disp_scale(EncoderBase::Operands & args, int baseReg, int disp, int indexReg, int scale,
	144	+ OpndSize sz, OpndExt ext = OpndExt_None) {
	145	+ if (sz == OpndSize_128)
	146	+ {
	147	+ //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined
	148	+ //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still
	149	+ //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx.
	150	+ sz = OpndSize_64;
	151	+ }
	152	+
	153	+ args.add(EncoderBase::Operand(sz,
	154	+ mapFromPhysicalReg (baseReg),
	155	+ mapFromPhysicalReg (indexReg), scale,
	156	+ disp, ext));
	157	+}
	158	+
	159	+inline void add_fp(EncoderBase::Operands & args, unsigned i, bool dbl) {
	160	+ return args.add((RegName)( (dbl ? RegName_FP0D : RegName_FP0S) + i));
	161	+}
	162	+inline void add_imm(EncoderBase::Operands & args, OpndSize sz, int value, bool is_signed) {
	163	+ //assert(n_size != imm.get_size());
	164	+ args.add(EncoderBase::Operand(sz, value,
	165	+ is_signed ? OpndExt_Signed : OpndExt_Zero));
	166	+}
	167	+
	168	+#define MAX_DECODED_STRING_LEN 1024
	169	+char tmpBuffer[MAX_DECODED_STRING_LEN];
	170	+
	171	+void printOperand(const EncoderBase::Operand & opnd) {
	172	+ unsigned int sz;
	173	+ if(!dump_x86_inst) return;
	174	+ sz = strlen(tmpBuffer);
	175	+ if(opnd.size() != OpndSize_32) {
	176	+ const char * opndSizeString = getOpndSizeString(opnd.size());
	177	+
	178	+ if (opndSizeString == NULL) {
	179	+ // If the string that represents operand size is null it means that
	180	+ // the operand size is an invalid value. Although this could be a
	181	+ // problem if instruction is corrupted, technically failing to
	182	+ // disassemble is not fatal. Thus, let's warn but proceed with using
	183	+ // an empty string.
	184	+ ALOGW("JIT-WARNING: Cannot decode instruction operand size.");
	185	+ opndSizeString = "";
	186	+ }
	187	+
	188	+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN - sz, "%s ",
	189	+ opndSizeString);
	190	+ }
	191	+ if(opnd.is_mem()) {
	192	+ if(opnd.scale() != 0) {
	193	+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz,
	194	+ "%d(%s,%s,%d)", opnd.disp(),
	195	+ getRegNameString(opnd.base()),
	196	+ getRegNameString(opnd.index()), opnd.scale());
	197	+ } else {
	198	+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%d(%s)",
	199	+ opnd.disp(), getRegNameString(opnd.base()));
	200	+ }
	201	+ }
	202	+ if(opnd.is_imm()) {
	203	+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "#%x",
	204	+ (int)opnd.imm());
	205	+ }
	206	+ if(opnd.is_reg()) {
	207	+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%s",
	208	+ getRegNameString(opnd.reg()));
	209	+ }
	210	+}
	211	+//TODO: the order of operands
	212	+//to make the printout have the same order as assembly in .S
	213	+//I reverse the order here
	214	+void printDecoderInst(Inst & decInst) {
	215	+ unsigned int sz;
	216	+ if(!dump_x86_inst) return;
	217	+ sz = strlen(tmpBuffer);
	218	+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%s ",
	219	+ EncoderBase::toStr(decInst.mn));
	220	+ for(unsigned int k = 0; k < decInst.argc; k++) {
	221	+ if(k > 0) {
	222	+ sz = strlen(tmpBuffer);
	223	+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, ", ");
	224	+ }
	225	+ printOperand(decInst.operands[decInst.argc-1-k]);
	226	+ }
	227	+ ALOGE("%s", tmpBuffer);
	228	+}
	229	+void printOperands(EncoderBase::Operands& opnds) {
	230	+ unsigned int sz;
	231	+ if(!dump_x86_inst) return;
	232	+ for(unsigned int k = 0; k < opnds.count(); k++) {
	233	+ if(k > 0) {
	234	+ sz = strlen(tmpBuffer);
	235	+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, ", ");
	236	+ }
	237	+ printOperand(opnds[opnds.count()-1-k]);
	238	+ }
	239	+}
	240	+void printEncoderInst(Mnemonic m, EncoderBase::Operands& opnds) {
	241	+ if(!dump_x86_inst) return;
	242	+ snprintf(tmpBuffer, MAX_DECODED_STRING_LEN, "--- ENC %s ",
	243	+ EncoderBase::toStr(m));
	244	+ printOperands(opnds);
	245	+ ALOGE("%s", tmpBuffer);
	246	+}
	247	+int decodeThenPrint(char* stream_start) {
	248	+ if(!dump_x86_inst) return 0;
	249	+ snprintf(tmpBuffer, MAX_DECODED_STRING_LEN, "--- INST @ %p: ",
	250	+ stream_start);
	251	+ Inst decInst;
	252	+ unsigned numBytes = DecoderBase::decode(stream_start, &decInst);
	253	+ printDecoderInst(decInst);
	254	+ return numBytes;
	255	+}
	256	+
	257	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm(Mnemonic m, OpndSize size, int imm, char * stream) {
	258	+ EncoderBase::Operands args;
	259	+ //assert(imm.get_size() == size_32);
	260	+ add_imm(args, size, imm, true/is_signed/);
	261	+#ifdef PRINT_ENCODER_STREAM
	262	+ char* stream_start = stream;
	263	+#endif
	264	+ stream = (char *)EncoderBase::encode(stream, m, args);
	265	+#ifdef PRINT_ENCODER_STREAM
	266	+ printEncoderInst(m, args);
	267	+ decodeThenPrint(stream_start);
	268	+#endif
	269	+ return stream;
	270	+}
	271	+extern "C" ENCODER_DECLARE_EXPORT unsigned encoder_get_inst_size(char * stream) {
	272	+ Inst decInst;
	273	+ unsigned numBytes = DecoderBase::decode(stream, &decInst);
	274	+ return numBytes;
	275	+}
	276	+
	277	+extern "C" ENCODER_DECLARE_EXPORT uintptr_t encoder_get_cur_operand_offset(int opnd_id)
	278	+{
	279	+ return (uintptr_t)EncoderBase::getOpndLocation(opnd_id);
	280	+}
	281	+
	282	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_update_imm(int imm, char * stream) {
	283	+ Inst decInst;
	284	+ EncoderBase::Operands args;
	285	+
	286	+ //Decode the instruction
	287	+ DecoderBase::decode(stream, &decInst);
	288	+
	289	+ add_imm(args, decInst.operands[0].size(), imm, true/is_signed/);
	290	+ char* stream_next = (char *)EncoderBase::encode(stream, decInst.mn, args);
	291	+#ifdef PRINT_ENCODER_STREAM
	292	+ printEncoderInst(decInst.mn, args);
	293	+ decodeThenPrint(stream);
	294	+#endif
	295	+ return stream_next;
	296	+}
	297	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem(Mnemonic m, OpndSize size,
	298	+ int disp, int base_reg, bool isBasePhysical, char * stream) {
	299	+ EncoderBase::Operands args;
	300	+ add_m(args, base_reg, disp, size);
	301	+#ifdef PRINT_ENCODER_STREAM
	302	+ char* stream_start = stream;
	303	+#endif
	304	+ stream = (char *)EncoderBase::encode(stream, m, args);
	305	+#ifdef PRINT_ENCODER_STREAM
	306	+ printEncoderInst(m, args);
	307	+ decodeThenPrint(stream_start);
	308	+#endif
	309	+ return stream;
	310	+}
	311	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg(Mnemonic m, OpndSize size,
	312	+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
	313	+ EncoderBase::Operands args;
	314	+ if(m == Mnemonic_DIV \|\| m == Mnemonic_IDIV \|\| m == Mnemonic_MUL \|\| m == Mnemonic_IMUL) {
	315	+ add_r(args, 0/eax/, size);
	316	+ add_r(args, 3/edx/, size);
	317	+ }
	318	+ add_r(args, reg, size);
	319	+#ifdef PRINT_ENCODER_STREAM
	320	+ char* stream_start = stream;
	321	+#endif
	322	+ stream = (char *)EncoderBase::encode(stream, m, args);
	323	+#ifdef PRINT_ENCODER_STREAM
	324	+ printEncoderInst(m, args);
	325	+ decodeThenPrint(stream_start);
	326	+#endif
	327	+ return stream;
	328	+}
	329	+//! \brief Allows for different operand sizes
	330	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg(Mnemonic m, OpndSize size,
	331	+ int imm, int reg, bool isPhysical, LowOpndRegType type, char * stream) {
	332	+ return encoder_imm_reg_diff_sizes(m, size, imm, size, reg, isPhysical, type, stream);
	333	+}
	334	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_reg_diff_sizes(Mnemonic m, OpndSize srcOpndSize,
	335	+ int reg, bool isPhysical, OpndSize destOpndSize,
	336	+ int reg2, bool isPhysical2, LowOpndRegType type, char * stream) {
	337	+ if((m == Mnemonic_MOV \|\| m == Mnemonic_MOVQ \|\| m == Mnemonic_MOVD) && reg == reg2) return stream;
	338	+ EncoderBase::Operands args;
	339	+ add_r(args, reg2, destOpndSize); //destination
	340	+ if(m == Mnemonic_SAL \|\| m == Mnemonic_SHR \|\| m == Mnemonic_SHL \|\| m == Mnemonic_SAR)
	341	+ add_r(args, reg, OpndSize_8);
	342	+ else
	343	+ add_r(args, reg, srcOpndSize);
	344	+#ifdef PRINT_ENCODER_STREAM
	345	+ char* stream_start = stream;
	346	+#endif
	347	+ stream = (char *)EncoderBase::encode(stream, m, args);
	348	+#ifdef PRINT_ENCODER_STREAM
	349	+ printEncoderInst(m, args);
	350	+ decodeThenPrint(stream_start);
	351	+#endif
	352	+ return stream;
	353	+}
	354	+//both operands have same size
	355	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_reg(Mnemonic m, OpndSize size,
	356	+ int reg, bool isPhysical,
	357	+ int reg2, bool isPhysical2, LowOpndRegType type, char * stream) {
	358	+ return encoder_reg_reg_diff_sizes(m, size, reg, isPhysical, size, reg2, isPhysical2, type, stream);
	359	+}
	360	+//! \brief Allows for different operand sizes
	361	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize,
	362	+ int disp, int base_reg, bool isBasePhysical, OpndSize regOpndSize,
	363	+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
	364	+ EncoderBase::Operands args;
	365	+ add_r(args, reg, regOpndSize);
	366	+ add_m(args, base_reg, disp, memOpndSize);
	367	+#ifdef PRINT_ENCODER_STREAM
	368	+ char* stream_start = stream;
	369	+#endif
	370	+ stream = (char *)EncoderBase::encode(stream, m, args);
	371	+#ifdef PRINT_ENCODER_STREAM
	372	+ printEncoderInst(m, args);
	373	+ decodeThenPrint(stream_start);
	374	+#endif
	375	+ return stream;
	376	+}
	377	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_reg(Mnemonic m, OpndSize size,
	378	+ int disp, int base_reg, bool isBasePhysical,
	379	+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
	380	+ return encoder_mem_to_reg_diff_sizes(m, size, disp, base_reg, isBasePhysical, size, reg, isPhysical, type, stream);
	381	+}
	382	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_scale_reg(Mnemonic m, OpndSize size,
	383	+ int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale,
	384	+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
	385	+ EncoderBase::Operands args;
	386	+ add_r(args, reg, size);
	387	+ add_m_scale(args, base_reg, index_reg, scale, size);
	388	+#ifdef PRINT_ENCODER_STREAM
	389	+ char* stream_start = stream;
	390	+#endif
	391	+ stream = (char *)EncoderBase::encode(stream, m, args);
	392	+#ifdef PRINT_ENCODER_STREAM
	393	+ printEncoderInst(m, args);
	394	+ decodeThenPrint(stream_start);
	395	+#endif
	396	+ return stream;
	397	+}
	398	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_mem_scale(Mnemonic m, OpndSize size,
	399	+ int reg, bool isPhysical,
	400	+ int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale,
	401	+ LowOpndRegType type, char * stream) {
	402	+ EncoderBase::Operands args;
	403	+ add_m_scale(args, base_reg, index_reg, scale, size);
	404	+ add_r(args, reg, size);
	405	+#ifdef PRINT_ENCODER_STREAM
	406	+ char* stream_start = stream;
	407	+#endif
	408	+ stream = (char *)EncoderBase::encode(stream, m, args);
	409	+#ifdef PRINT_ENCODER_STREAM
	410	+ printEncoderInst(m, args);
	411	+ decodeThenPrint(stream_start);
	412	+#endif
	413	+ return stream;
	414	+}
	415	+//! \brief Allows for different operand sizes
	416	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize,
	417	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	418	+ OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream) {
	419	+ EncoderBase::Operands args;
	420	+ add_r(args, reg, regOpndSize);
	421	+ add_m_disp_scale(args, base_reg, disp, index_reg, scale, memOpndSize);
	422	+#ifdef PRINT_ENCODER_STREAM
	423	+ char* stream_start = stream;
	424	+#endif
	425	+ stream = (char *)EncoderBase::encode(stream, m, args);
	426	+#ifdef PRINT_ENCODER_STREAM
	427	+ printEncoderInst(m, args);
	428	+ decodeThenPrint(stream_start);
	429	+#endif
	430	+ return stream;
	431	+}
	432	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_reg(Mnemonic m, OpndSize size,
	433	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	434	+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
	435	+ return encoder_mem_disp_scale_to_reg_diff_sizes(m, size, base_reg, isBasePhysical,
	436	+ disp, index_reg, isIndexPhysical, scale, size, reg, isPhysical,
	437	+ type, stream);
	438	+}
	439	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_movzs_mem_disp_scale_reg(Mnemonic m, OpndSize size,
	440	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	441	+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
	442	+ EncoderBase::Operands args;
	443	+ add_r(args, reg, OpndSize_32);
	444	+ add_m_disp_scale(args, base_reg, disp, index_reg, scale, size);
	445	+#ifdef PRINT_ENCODER_STREAM
	446	+ char* stream_start = stream;
	447	+#endif
	448	+ stream = (char *)EncoderBase::encode(stream, m, args);
	449	+#ifdef PRINT_ENCODER_STREAM
	450	+ printEncoderInst(m, args);
	451	+ decodeThenPrint(stream_start);
	452	+#endif
	453	+ return stream;
	454	+}
	455	+extern "C" ENCODER_DECLARE_EXPORT char* encoder_reg_mem_disp_scale(Mnemonic m, OpndSize size,
	456	+ int reg, bool isPhysical,
	457	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	458	+ LowOpndRegType type, char* stream) {
	459	+ EncoderBase::Operands args;
	460	+ add_m_disp_scale(args, base_reg, disp, index_reg, scale, size);
	461	+ add_r(args, reg, size);
	462	+#ifdef PRINT_ENCODER_STREAM
	463	+ char* stream_start = stream;
	464	+#endif
	465	+ stream = (char *)EncoderBase::encode(stream, m, args);
	466	+#ifdef PRINT_ENCODER_STREAM
	467	+ printEncoderInst(m, args);
	468	+ decodeThenPrint(stream_start);
	469	+#endif
	470	+ return stream;
	471	+}
	472	+
	473	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_mem(Mnemonic m, OpndSize size,
	474	+ int reg, bool isPhysical,
	475	+ int disp, int base_reg, bool isBasePhysical, LowOpndRegType type, char * stream) {
	476	+ EncoderBase::Operands args;
	477	+ add_m(args, base_reg, disp, size);
	478	+ add_r(args, reg, size);
	479	+#ifdef PRINT_ENCODER_STREAM
	480	+ char* stream_start = stream;
	481	+#endif
	482	+ if (m == Mnemonic_CMPXCHG ){
	483	+ //CMPXCHG require EAX as args
	484	+ add_r(args,PhysicalReg_EAX,size);
	485	+ //Add lock prefix for CMPXCHG, guarantee the atomic of CMPXCHG in multi-core platform
	486	+ stream = (char *)EncoderBase::prefix(stream, InstPrefix_LOCK);
	487	+ }
	488	+ stream = (char *)EncoderBase::encode(stream, m, args);
	489	+#ifdef PRINT_ENCODER_STREAM
	490	+ printEncoderInst(m, args);
	491	+ decodeThenPrint(stream_start);
	492	+#endif
	493	+ return stream;
	494	+}
	495	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg_diff_sizes (Mnemonic m, OpndSize sizeImm, int imm,
	496	+ OpndSize sizeReg, int reg, bool isPhysical, LowOpndRegType type, char * stream)
	497	+{
	498	+ //Create the operands
	499	+ EncoderBase::Operands args;
	500	+ //Add destination register
	501	+ add_r (args, reg, sizeReg);
	502	+ //For imul, we need to add implicit register explicitly
	503	+ if (m == Mnemonic_IMUL)
	504	+ {
	505	+ add_r (args, reg, sizeReg);
	506	+ }
	507	+ //Finally add the immediate
	508	+ add_imm (args, sizeImm, imm, true/is_signed/);
	509	+
	510	+#ifdef PRINT_ENCODER_STREAM
	511	+ char* stream_start = stream;
	512	+#endif
	513	+
	514	+ //Now do the encoding
	515	+ stream = EncoderBase::encode (stream, m, args);
	516	+
	517	+#ifdef PRINT_ENCODER_STREAM
	518	+ printEncoderInst(m, args);
	519	+ decodeThenPrint(stream_start);
	520	+#endif
	521	+
	522	+ return stream;
	523	+}
	524	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_update_imm_rm(int imm, char * stream) {
	525	+ Inst decInst;
	526	+ EncoderBase::Operands args;
	527	+
	528	+ //Decode the instruction
	529	+ DecoderBase::decode(stream, &decInst);
	530	+
	531	+ args.add(decInst.operands[0]);
	532	+ add_imm(args, decInst.operands[1].size(), imm, true/is_signed/);
	533	+ char* stream_next = (char *)EncoderBase::encode(stream, decInst.mn, args);
	534	+#ifdef PRINT_ENCODER_STREAM
	535	+ printEncoderInst(decInst.mn, args);
	536	+ decodeThenPrint(stream);
	537	+#endif
	538	+ return stream_next;
	539	+}
	540	+
	541	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_mem(Mnemonic m, OpndSize size,
	542	+ int imm,
	543	+ int disp, int base_reg, bool isBasePhysical, char * stream) {
	544	+ return encoder_imm_mem_diff_sizes(m, size, imm, size, disp, base_reg, isBasePhysical, stream);
	545	+}
	546	+
	547	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_mem_diff_sizes (Mnemonic m, OpndSize immOpndSize, int imm,
	548	+ OpndSize memOpndSize, int disp, int baseRegister, bool isBasePhysical, char * stream)
	549	+{
	550	+ //Add operands
	551	+ EncoderBase::Operands args;
	552	+ add_m (args, baseRegister, disp, memOpndSize);
	553	+ add_imm (args, immOpndSize, imm, true);
	554	+
	555	+#ifdef PRINT_ENCODER_STREAM
	556	+ char* stream_start = stream;
	557	+#endif
	558	+
	559	+ //Do the encoding
	560	+ stream = EncoderBase::encode (stream, m, args);
	561	+
	562	+#ifdef PRINT_ENCODER_STREAM
	563	+ printEncoderInst(m, args);
	564	+ decodeThenPrint(stream_start);
	565	+#endif
	566	+
	567	+ return stream;
	568	+}
	569	+
	570	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_fp_mem(Mnemonic m, OpndSize size, int reg,
	571	+ int disp, int base_reg, bool isBasePhysical, char * stream) {
	572	+ EncoderBase::Operands args;
	573	+ add_m(args, base_reg, disp, size);
	574	+ // a fake FP register as operand
	575	+ add_fp(args, reg, size == OpndSize_64/is_double/);
	576	+#ifdef PRINT_ENCODER_STREAM
	577	+ char* stream_start = stream;
	578	+#endif
	579	+ stream = (char *)EncoderBase::encode(stream, m, args);
	580	+#ifdef PRINT_ENCODER_STREAM
	581	+ printEncoderInst(m, args);
	582	+ decodeThenPrint(stream_start);
	583	+#endif
	584	+ return stream;
	585	+}
	586	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_fp(Mnemonic m, OpndSize size,
	587	+ int disp, int base_reg, bool isBasePhysical,
	588	+ int reg, char * stream) {
	589	+ EncoderBase::Operands args;
	590	+ // a fake FP register as operand
	591	+ add_fp(args, reg, size == OpndSize_64/is_double/);
	592	+ add_m(args, base_reg, disp, size);
	593	+#ifdef PRINT_ENCODER_STREAM
	594	+ char* stream_start = stream;
	595	+#endif
	596	+ stream = (char *)EncoderBase::encode(stream, m, args);
	597	+#ifdef PRINT_ENCODER_STREAM
	598	+ printEncoderInst(m, args);
	599	+ decodeThenPrint(stream_start);
	600	+#endif
	601	+ return stream;
	602	+}
	603	+
	604	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_return(char * stream) {
	605	+ EncoderBase::Operands args;
	606	+#ifdef PRINT_ENCODER_STREAM
	607	+ char* stream_start = stream;
	608	+#endif
	609	+ stream = (char *)EncoderBase::encode(stream, Mnemonic_RET, args);
	610	+#ifdef PRINT_ENCODER_STREAM
	611	+ printEncoderInst(Mnemonic_RET, args);
	612	+ decodeThenPrint(stream_start);
	613	+#endif
	614	+ return stream;
	615	+}
	616	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_compare_fp_stack(bool pop, int reg, bool isDouble, char * stream) {
	617	+ Mnemonic m = pop ? Mnemonic_FUCOMIP : Mnemonic_FUCOMI;
	618	+ //a single operand or 2 operands?
	619	+ //FST ST(i) has a single operand in encoder.inl?
	620	+ EncoderBase::Operands args;
	621	+ add_fp(args, reg, isDouble);
	622	+#ifdef PRINT_ENCODER_STREAM
	623	+ char* stream_start = stream;
	624	+#endif
	625	+ stream = (char *)EncoderBase::encode(stream, m, args);
	626	+#ifdef PRINT_ENCODER_STREAM
	627	+ printEncoderInst(m, args);
	628	+ decodeThenPrint(stream_start);
	629	+#endif
	630	+ return stream;
	631	+}
	632	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_movez_mem_to_reg(OpndSize size,
	633	+ int disp, int base_reg, bool isBasePhysical,
	634	+ int reg, bool isPhysical, char * stream) {
	635	+ EncoderBase::Operands args;
	636	+ add_r(args, reg, OpndSize_32);
	637	+ add_m(args, base_reg, disp, size);
	638	+#ifdef PRINT_ENCODER_STREAM
	639	+ char* stream_start = stream;
	640	+#endif
	641	+ stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVZX, args);
	642	+#ifdef PRINT_ENCODER_STREAM
	643	+ printEncoderInst(Mnemonic_MOVZX, args);
	644	+ decodeThenPrint(stream_start);
	645	+#endif
	646	+ return stream;
	647	+}
	648	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_moves_mem_to_reg(OpndSize size,
	649	+ int disp, int base_reg, bool isBasePhysical,
	650	+ int reg, bool isPhysical, char * stream) {
	651	+ EncoderBase::Operands args;
	652	+ add_r(args, reg, OpndSize_32);
	653	+ add_m(args, base_reg, disp, size);
	654	+#ifdef PRINT_ENCODER_STREAM
	655	+ char* stream_start = stream;
	656	+#endif
	657	+ stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVSX, args);
	658	+#ifdef PRINT_ENCODER_STREAM
	659	+ printEncoderInst(Mnemonic_MOVSX, args);
	660	+ decodeThenPrint(stream_start);
	661	+#endif
	662	+ return stream;
	663	+}
	664	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_movez_reg_to_reg(OpndSize size,
	665	+ int reg, bool isPhysical, int reg2,
	666	+ bool isPhysical2, LowOpndRegType type, char * stream) {
	667	+ EncoderBase::Operands args;
	668	+ add_r(args, reg2, OpndSize_32); //destination
	669	+ add_r(args, reg, size);
	670	+#ifdef PRINT_ENCODER_STREAM
	671	+ char* stream_start = stream;
	672	+#endif
	673	+ stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVZX, args);
	674	+#ifdef PRINT_ENCODER_STREAM
	675	+ printEncoderInst(Mnemonic_MOVZX, args);
	676	+ decodeThenPrint(stream_start);
	677	+#endif
	678	+ return stream;
	679	+}
	680	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_moves_reg_to_reg(OpndSize size,
	681	+ int reg, bool isPhysical,int reg2,
	682	+ bool isPhysical2, LowOpndRegType type, char * stream) {
	683	+ EncoderBase::Operands args;
	684	+ add_r(args, reg2, OpndSize_32); //destination
	685	+ add_r(args, reg, size);
	686	+#ifdef PRINT_ENCODER_STREAM
	687	+ char* stream_start = stream;
	688	+#endif
	689	+ stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVSX, args);
	690	+#ifdef PRINT_ENCODER_STREAM
	691	+ printEncoderInst(Mnemonic_MOVSX, args);
	692	+ decodeThenPrint(stream_start);
	693	+#endif
	694	+ return stream;
	695	+}
	696	+
	697	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg_reg (Mnemonic m, int imm, OpndSize immediateSize,
	698	+ int sourceReg, OpndSize sourceRegSize, int destReg, OpndSize destRegSize, char * stream)
	699	+{
	700	+ EncoderBase::Operands args;
	701	+
	702	+ //Add the source and destination registers
	703	+ add_r (args, destReg, destRegSize);
	704	+ add_r (args, sourceReg, sourceRegSize);
	705	+
	706	+ //Now add the immediate. We expect in three operand situation that immediate is last argument
	707	+ add_imm (args, immediateSize, imm, true/is_signed/);
	708	+
	709	+#ifdef PRINT_ENCODER_STREAM
	710	+ char* stream_start = stream;
	711	+#endif
	712	+
	713	+ //Do the actual encoding
	714	+ stream = EncoderBase::encode (stream, m, args);
	715	+
	716	+#ifdef PRINT_ENCODER_STREAM
	717	+ printEncoderInst (m, args);
	718	+ decodeThenPrint (stream_start);
	719	+#endif
	720	+
	721	+ //Return the updated stream pointer
	722	+ return stream;
	723	+}
	724	+
	725	+/**
	726	+ * @brief Generates variable sized nop instructions.
	727	+ * @param numBytes Number of bytes for the nop instruction. If this value is
	728	+ * larger than 9 bytes, more than one nop instruction will be generated.
	729	+ * @param stream Instruction stream where to place the nops
	730	+ * @return Updated instruction stream pointer after generating the nops
	731	+ */
	732	+extern "C" ENCODER_DECLARE_EXPORT char * encoder_nops(unsigned numBytes, char * stream) {
	733	+ return EncoderBase::nops(stream, numBytes);
	734	+}
	735	+
	736	+// Disassemble the operand "opnd" and put the readable format in "strbuf"
	737	+// up to a string length of "len".
	738	+unsigned int DisassembleOperandToBuf(const EncoderBase::Operand& opnd, char* strbuf, unsigned int len)
	739	+{
	740	+ unsigned int sz = 0;
	741	+ if(opnd.size() != OpndSize_32) {
	742	+ const char * opndSizeString = getOpndSizeString(opnd.size());
	743	+
	744	+ if (opndSizeString == NULL) {
	745	+ // If the string that represents operand size is null it means that
	746	+ // the operand size is an invalid value. Although this could be a
	747	+ // problem if instruction is corrupted, technically failing to
	748	+ // disassemble is not fatal. Thus, let's warn but proceed with using
	749	+ // an empty string.
	750	+ ALOGW("JIT-WARNING: Cannot decode instruction operand size.");
	751	+ opndSizeString = "";
	752	+ }
	753	+
	754	+ sz += snprintf(&strbuf[sz], len-sz, "%s ", opndSizeString);
	755	+ }
	756	+ if(opnd.is_mem()) {
	757	+ if(opnd.scale() != 0) {
	758	+ sz += snprintf(&strbuf[sz], len-sz, "%d(%s,%s,%d)", opnd.disp(),
	759	+ getRegNameString(opnd.base()),
	760	+ getRegNameString(opnd.index()), opnd.scale());
	761	+ } else {
	762	+ sz += snprintf(&strbuf[sz], len-sz, "%d(%s)",
	763	+ opnd.disp(), getRegNameString(opnd.base()));
	764	+ }
	765	+ } else if(opnd.is_imm()) {
	766	+ sz += snprintf(&strbuf[sz], len-sz, "#%x", (int)opnd.imm());
	767	+ } else if(opnd.is_reg()) {
	768	+ sz += snprintf(&strbuf[sz], len-sz, "%s",
	769	+ getRegNameString(opnd.reg()));
	770	+ }
	771	+ return sz;
	772	+}
	773	+
	774	+// Disassemble the instruction "decInst" and put the readable format
	775	+// in "strbuf" up to a string length of "len".
	776	+void DisassembleInstToBuf(Inst& decInst, char* strbuf, unsigned int len)
	777	+{
	778	+ unsigned int sz = 0;
	779	+ int k;
	780	+ sz += snprintf(&strbuf[sz], len-sz, "%s ", EncoderBase::toStr(decInst.mn));
	781	+ if (decInst.argc > 0) {
	782	+ sz += DisassembleOperandToBuf(decInst.operands[decInst.argc-1],
	783	+ &strbuf[sz], len-sz);
	784	+ for(k = decInst.argc-2; k >= 0; k--) {
	785	+ sz += snprintf(&strbuf[sz], len-sz, ", ");
	786	+ sz += DisassembleOperandToBuf(decInst.operands[k], &strbuf[sz], len-sz);
	787	+ }
	788	+ }
	789	+}
	790	+
	791	+// Disassmble the x86 instruction pointed to by code pointer "stream."
	792	+// Put the disassemble text in the "strbuf" up to string length "len".
	793	+// Return the code pointer after the disassemble x86 instruction.
	794	+extern "C" ENCODER_DECLARE_EXPORT
	795	+char* decoder_disassemble_instr(char* stream, char* strbuf, unsigned int len)
	796	+{
	797	+ Inst decInst;
	798	+ unsigned numBytes = DecoderBase::decode(stream, &decInst);
	799	+ DisassembleInstToBuf(decInst, strbuf, len);
	800	+ return (stream + numBytes);
	801	+}
	802	+
	803	+/**
	804	+ * @brief Physical register char* counterparts
	805	+ */
	806	+static const char * PhysicalRegString[] = { "eax", "ebx", "ecx", "edx", "edi",
	807	+ "esi", "esp", "ebp", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
	808	+ "xmm6", "xmm7", "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7",
	809	+ "null"
	810	+ };
	811	+
	812	+/**
	813	+ * @brief Scratch register char* counterparts
	814	+ */
	815	+static const char * ScratchRegString[] = { "scratch1", "scratch2", "scratch3",
	816	+ "scratch4", "scratch5", "scratch6", "scratch7", "scratch8", "scratch9",
	817	+ "scratch10" };
	818	+
	819	+extern "C" ENCODER_DECLARE_EXPORT
	820	+/**
	821	+ * @brief Transform a physical register into its char* counterpart
	822	+ * @param reg the PhysicalReg we want to have a char* equivalent
	823	+ * @return the register reg in char* form
	824	+ */
	825	+const char * physicalRegToString(PhysicalReg reg)
	826	+{
	827	+ if (reg < PhysicalReg_Null) {
	828	+ return PhysicalRegString[reg];
	829	+ } else if (reg >= PhysicalReg_SCRATCH_1 && reg <= PhysicalReg_SCRATCH_10) {
	830	+ return ScratchRegString[reg - PhysicalReg_SCRATCH_1];
	831	+ } else if (reg == PhysicalReg_Null) {
	832	+ return "null";
	833	+ } else {
	834	+ return "corrupted-data";
	835	+ }
	836	+}

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.h

		@@ -0,0 +1,283 @@
	1	+/*
	2	+ * Copyright (C) 2012 The Android Open Source Project
	3	+ *
	4	+ * Licensed under the Apache License, Version 2.0 (the "License");
	5	+ * you may not use this file except in compliance with the License.
	6	+ * You may obtain a copy of the License at
	7	+ *
	8	+ * http://www.apache.org/licenses/LICENSE-2.0
	9	+ *
	10	+ * Unless required by applicable law or agreed to in writing, software
	11	+ * distributed under the License is distributed on an "AS IS" BASIS,
	12	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	+ * See the License for the specific language governing permissions and
	14	+ * limitations under the License.
	15	+ */
	16	+
	17	+#ifndef _VM_ENC_WRAPPER_H_
	18	+#define _VM_ENC_WRAPPER_H_
	19	+
	20	+#include "enc_defs_ext.h"
	21	+
	22	+extern bool dump_x86_inst;
	23	+typedef enum PhysicalReg {
	24	+ // Currently initializing StartOfGPMarker to be 0 in order to match
	25	+ // register index in Reg_No. However, ideally PhysicalReg_Null should
	26	+ // be 0 and the rest moved over.
	27	+ PhysicalReg_StartOfGPMarker = 0,
	28	+ PhysicalReg_EAX = PhysicalReg_StartOfGPMarker,
	29	+ PhysicalReg_EBX, PhysicalReg_ECX, PhysicalReg_EDX,
	30	+ PhysicalReg_EDI, PhysicalReg_ESI, PhysicalReg_ESP, PhysicalReg_EBP,
	31	+ PhysicalReg_EndOfGPMarker = PhysicalReg_EBP,
	32	+
	33	+ PhysicalReg_StartOfXmmMarker,
	34	+ PhysicalReg_XMM0 = PhysicalReg_StartOfXmmMarker,
	35	+ PhysicalReg_XMM1, PhysicalReg_XMM2, PhysicalReg_XMM3,
	36	+ PhysicalReg_XMM4, PhysicalReg_XMM5, PhysicalReg_XMM6, PhysicalReg_XMM7,
	37	+ PhysicalReg_EndOfXmmMarker = PhysicalReg_XMM7,
	38	+
	39	+ PhysicalReg_StartOfX87Marker,
	40	+ PhysicalReg_ST0 = PhysicalReg_StartOfX87Marker, PhysicalReg_ST1,
	41	+ PhysicalReg_ST2, PhysicalReg_ST3, PhysicalReg_ST4, PhysicalReg_ST5,
	42	+ PhysicalReg_ST6, PhysicalReg_ST7,
	43	+ PhysicalReg_EndOfX87Marker = PhysicalReg_ST7,
	44	+
	45	+ PhysicalReg_Null,
	46	+ //used as scratch logical register in NCG O1
	47	+ //should not overlap with regular logical register, start from 100
	48	+ PhysicalReg_SCRATCH_1 = 100, PhysicalReg_SCRATCH_2, PhysicalReg_SCRATCH_3, PhysicalReg_SCRATCH_4,
	49	+ PhysicalReg_SCRATCH_5, PhysicalReg_SCRATCH_6, PhysicalReg_SCRATCH_7, PhysicalReg_SCRATCH_8,
	50	+ PhysicalReg_SCRATCH_9, PhysicalReg_SCRATCH_10,
	51	+
	52	+ //This should be the last entry
	53	+ PhysicalReg_Last = PhysicalReg_SCRATCH_10
	54	+} PhysicalReg;
	55	+
	56	+typedef enum Reg_No {
	57	+#ifdef _EM64T_
	58	+ rax_reg = 0,rbx_reg, rcx_reg, rdx_reg,
	59	+ rdi_reg, rsi_reg, rsp_reg, rbp_reg,
	60	+ r8_reg, r9_reg, r10_reg, r11_reg,
	61	+ r12_reg, r13_reg, r14_reg, r15_reg,
	62	+ xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg,
	63	+ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg,
	64	+ xmm8_reg, xmm9_reg, xmm10_reg, xmm11_reg,
	65	+ xmm12_reg, xmm13_reg, xmm14_reg, xmm15_reg,
	66	+
	67	+#else // !defined(_EM64T_)
	68	+
	69	+ eax_reg = 0,ebx_reg, ecx_reg, edx_reg,
	70	+ edi_reg, esi_reg, esp_reg, ebp_reg,
	71	+ xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg,
	72	+ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg,
	73	+ fs_reg,
	74	+#endif
	75	+ /** @brief Total number of registers.*/
	76	+ n_reg
	77	+} Reg_No;
	78	+//
	79	+// instruction operand sizes: 8,16,32,64 bits
	80	+//
	81	+typedef enum Opnd_Size {
	82	+ size_8 = 0,
	83	+ size_16,
	84	+ size_32,
	85	+ size_64,
	86	+ n_size,
	87	+#ifdef _EM64T_
	88	+ size_platf = size_64
	89	+#else
	90	+ size_platf = size_32
	91	+#endif
	92	+} Opnd_Size;
	93	+
	94	+//
	95	+// opcodes for alu instructions
	96	+//
	97	+typedef enum ALU_Opcode {
	98	+ add_opc = 0,or_opc, adc_opc, sbb_opc,
	99	+ and_opc, sub_opc, xor_opc, cmp_opc,
	100	+ mul_opc, imul_opc, div_opc, idiv_opc,
	101	+ sll_opc, srl_opc, sra_opc, //shift right arithmetic
	102	+ shl_opc, shr_opc,
	103	+ sal_opc, sar_opc,
	104	+ neg_opc, not_opc, andn_opc,
	105	+ n_alu
	106	+} ALU_Opcode;
	107	+
	108	+typedef enum ConditionCode {
	109	+ Condition_O = 0,
	110	+ Condition_NO = 1,
	111	+ Condition_B = 2,
	112	+ Condition_NAE = Condition_B,
	113	+ Condition_C = Condition_B,
	114	+ Condition_NB = 3,
	115	+ Condition_AE = Condition_NB,
	116	+ Condition_NC = Condition_NB,
	117	+ Condition_Z = 4,
	118	+ Condition_E = Condition_Z,
	119	+ Condition_NZ = 5,
	120	+ Condition_NE = Condition_NZ,
	121	+ Condition_BE = 6,
	122	+ Condition_NA = Condition_BE,
	123	+ Condition_NBE = 7,
	124	+ Condition_A = Condition_NBE,
	125	+
	126	+ Condition_S = 8,
	127	+ Condition_NS = 9,
	128	+ Condition_P = 10,
	129	+ Condition_PE = Condition_P,
	130	+ Condition_NP = 11,
	131	+ Condition_PO = Condition_NP,
	132	+ Condition_L = 12,
	133	+ Condition_NGE = Condition_L,
	134	+ Condition_NL = 13,
	135	+ Condition_GE = Condition_NL,
	136	+ Condition_LE = 14,
	137	+ Condition_NG = Condition_LE,
	138	+ Condition_NLE = 15,
	139	+ Condition_G = Condition_NLE,
	140	+ Condition_Count = 16
	141	+} ConditionCode;
	142	+
	143	+//
	144	+// prefix code
	145	+//
	146	+typedef enum InstrPrefix {
	147	+ no_prefix,
	148	+ lock_prefix = 0xF0,
	149	+ hint_branch_taken_prefix = 0x2E,
	150	+ hint_branch_not_taken_prefix = 0x3E,
	151	+ prefix_repne = 0xF2,
	152	+ prefix_repnz = prefix_repne,
	153	+ prefix_repe = 0xF3,
	154	+ prefix_repz = prefix_repe,
	155	+ prefix_rep = 0xF3,
	156	+ prefix_cs = 0x2E,
	157	+ prefix_ss = 0x36,
	158	+ prefix_ds = 0x3E,
	159	+ prefix_es = 0x26,
	160	+ prefix_fs = 0x64,
	161	+ prefix_gs = 0x65
	162	+} InstrPrefix;
	163	+
	164	+enum LowOpndRegType
	165	+{
	166	+ LowOpndRegType_gp = 0,
	167	+ LowOpndRegType_fs = 1,
	168	+ LowOpndRegType_xmm = 2,
	169	+ LowOpndRegType_fs_s = 3,
	170	+ LowOpndRegType_ss = 4,
	171	+ LowOpndRegType_invalid = 256,
	172	+};
	173	+
	174	+enum LogicalRegType
	175	+{
	176	+ LogicalType_invalid = 0,
	177	+ LowOpndRegType_scratch = 8,
	178	+ LowOpndRegType_temp = 16,
	179	+ LowOpndRegType_hard = 32,
	180	+ LowOpndRegType_virtual = 64,
	181	+};
	182	+
	183	+//if inline, separte enc_wrapper.cpp into two files, one of them is .inl
	184	+// enc_wrapper.cpp needs to handle both cases
	185	+#ifdef ENCODER_INLINE
	186	+ #define ENCODER_DECLARE_EXPORT inline
	187	+ #include "enc_wrapper.inl"
	188	+#else
	189	+ #define ENCODER_DECLARE_EXPORT
	190	+#endif
	191	+
	192	+#ifdef __cplusplus
	193	+extern "C"
	194	+{
	195	+#endif
	196	+ENCODER_DECLARE_EXPORT char* encoder_imm(Mnemonic m, OpndSize size,
	197	+ int imm, char* stream);
	198	+ENCODER_DECLARE_EXPORT unsigned encoder_get_inst_size(char * stream);
	199	+ENCODER_DECLARE_EXPORT char* encoder_update_imm(int imm, char * stream);
	200	+ENCODER_DECLARE_EXPORT char* encoder_mem(Mnemonic m, OpndSize size,
	201	+ int disp, int base_reg, bool isBasePhysical, char* stream);
	202	+ENCODER_DECLARE_EXPORT char* encoder_reg(Mnemonic m, OpndSize size,
	203	+ int reg, bool isPhysical, LowOpndRegType type, char* stream);
	204	+ENCODER_DECLARE_EXPORT char* encoder_reg_reg(Mnemonic m, OpndSize size,
	205	+ int reg, bool isPhysical,
	206	+ int reg2, bool isPhysical2, LowOpndRegType type, char* stream);
	207	+ENCODER_DECLARE_EXPORT char* encoder_reg_reg_diff_sizes(Mnemonic m, OpndSize srcOpndSize,
	208	+ int reg, bool isPhysical, OpndSize destOpndSize,
	209	+ int reg2, bool isPhysical2, LowOpndRegType type, char* stream);
	210	+ENCODER_DECLARE_EXPORT char* encoder_mem_reg(Mnemonic m, OpndSize size,
	211	+ int disp, int base_reg, bool isBasePhysical,
	212	+ int reg, bool isPhysical, LowOpndRegType type, char* stream);
	213	+ENCODER_DECLARE_EXPORT char* encoder_mem_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize,
	214	+ int disp, int base_reg, bool isBasePhysical, OpndSize regOpndSize,
	215	+ int reg, bool isPhysical, LowOpndRegType type, char* stream);
	216	+ENCODER_DECLARE_EXPORT char* encoder_mem_scale_reg(Mnemonic m, OpndSize size,
	217	+ int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale,
	218	+ int reg, bool isPhysical, LowOpndRegType type, char* stream);
	219	+ENCODER_DECLARE_EXPORT char* encoder_reg_mem_scale(Mnemonic m, OpndSize size,
	220	+ int reg, bool isPhysical,
	221	+ int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale,
	222	+ LowOpndRegType type, char* stream);
	223	+ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_reg(Mnemonic m, OpndSize size,
	224	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	225	+ int reg, bool isPhysical, LowOpndRegType type, char * stream);
	226	+ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize,
	227	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	228	+ OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream);
	229	+ENCODER_DECLARE_EXPORT char * encoder_movzs_mem_disp_scale_reg(Mnemonic m, OpndSize size,
	230	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	231	+ int reg, bool isPhysical, LowOpndRegType type, char * stream);
	232	+ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_2(Mnemonic m, OpndSize memOpndSize,
	233	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	234	+ OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream);
	235	+ENCODER_DECLARE_EXPORT char* encoder_reg_mem_disp_scale(Mnemonic m, OpndSize size,
	236	+ int reg, bool isPhysical,
	237	+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
	238	+ LowOpndRegType type, char* stream);
	239	+ENCODER_DECLARE_EXPORT char* encoder_reg_mem(Mnemonic m, OpndSize size,
	240	+ int reg, bool isPhysical,
	241	+ int disp, int base_reg, bool isBasePhysical, LowOpndRegType type, char* stream);
	242	+ENCODER_DECLARE_EXPORT char* encoder_imm_reg(Mnemonic m, OpndSize size,
	243	+ int imm, int reg, bool isPhysical, LowOpndRegType type, char* stream);
	244	+ENCODER_DECLARE_EXPORT char * encoder_imm_reg_diff_sizes(Mnemonic m, OpndSize sizeImm,
	245	+ int imm, OpndSize sizeReg, int reg, bool isPhysical, LowOpndRegType type, char * stream);
	246	+ENCODER_DECLARE_EXPORT char * encoder_update_imm_rm(int imm, char * stream);
	247	+ENCODER_DECLARE_EXPORT char* encoder_imm_mem(Mnemonic m, OpndSize size,
	248	+ int imm,
	249	+ int disp, int base_reg, bool isBasePhysical, char* stream);
	250	+ENCODER_DECLARE_EXPORT char * encoder_imm_mem_diff_sizes (Mnemonic m, OpndSize immOpndSize, int imm,
	251	+ OpndSize memOpndSize, int disp, int baseRegister, bool isBasePhysical, char * stream);
	252	+ENCODER_DECLARE_EXPORT char* encoder_fp_mem(Mnemonic m, OpndSize size, int reg,
	253	+ int disp, int base_reg, bool isBasePhysical, char* stream);
	254	+ENCODER_DECLARE_EXPORT char* encoder_mem_fp(Mnemonic m, OpndSize size,
	255	+ int disp, int base_reg, bool isBasePhysical,
	256	+ int reg, char* stream);
	257	+ENCODER_DECLARE_EXPORT char* encoder_return(char* stream);
	258	+ENCODER_DECLARE_EXPORT char* encoder_compare_fp_stack(bool pop, int reg, bool isDouble, char* stream);
	259	+ENCODER_DECLARE_EXPORT char* encoder_movez_mem_to_reg(OpndSize size,
	260	+ int disp, int base_reg, bool isBasePhysical,
	261	+ int reg, bool isPhysical, char* stream);
	262	+ENCODER_DECLARE_EXPORT char* encoder_moves_mem_to_reg(OpndSize size,
	263	+ int disp, int base_reg, bool isBasePhysical,
	264	+ int reg, bool isPhysical, char* stream);
	265	+ENCODER_DECLARE_EXPORT char * encoder_movez_reg_to_reg(OpndSize size,
	266	+ int reg, bool isPhysical, int reg2,
	267	+ bool isPhysical2, LowOpndRegType type, char * stream);
	268	+ENCODER_DECLARE_EXPORT char * encoder_moves_reg_to_reg(OpndSize size,
	269	+ int reg, bool isPhysical, int reg2,
	270	+ bool isPhysical2, LowOpndRegType type, char * stream);
	271	+ENCODER_DECLARE_EXPORT char * encoder_imm_reg_reg (Mnemonic m, int imm, OpndSize immediateSize,
	272	+ int sourceReg, OpndSize sourceRegSize, int destReg,
	273	+ OpndSize destRegSize, char * stream);
	274	+ENCODER_DECLARE_EXPORT char * encoder_nops(unsigned numBytes, char * stream);
	275	+ENCODER_DECLARE_EXPORT int decodeThenPrint(char* stream_start);
	276	+ENCODER_DECLARE_EXPORT char* decoder_disassemble_instr(char* stream, char* strbuf, unsigned int len);
	277	+
	278	+//Provide a char* equivalent to a PhysicalReg type
	279	+ENCODER_DECLARE_EXPORT const char * physicalRegToString(PhysicalReg reg);
	280	+#ifdef __cplusplus
	281	+}
	282	+#endif
	283	+#endif // _VM_ENC_WRAPPER_H_

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/encoder.h

		@@ -0,0 +1,717 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+/**
	21	+ * @file
	22	+ * @brief Simple interface for generating processor instructions.
	23	+ *
	24	+ * The interface works for both IA32 and EM64T. By default, only IA32
	25	+ * capabilities are presented. To enable EM64T feature, the _EM64T_ macro
	26	+ * must be defined (and, of course, a proper library version to be used).
	27	+ *
	28	+ * The interface is based on the original ia32.h encoder interface,
	29	+ * with some simplifications and add-ons - EM64T-specific, SSE and SSE2.
	30	+ *
	31	+ * The interface mostly intended for existing legacy code like LIL code
	32	+ * generator. From the implementation point of view, it's just a wrapper
	33	+ * around the EncoderBase functionality.
	34	+ */
	35	+
	36	+#ifndef _VM_ENCODER_H_
	37	+#define _VM_ENCODER_H_
	38	+
	39	+#include <limits.h>
	40	+#include "enc_base.h"
	41	+//#include "open/types.h"
	42	+
	43	+#ifdef _EM64T_
	44	+// size of general-purpose value on the stack in bytes
	45	+#define GR_STACK_SIZE 8
	46	+// size of floating-point value on the stack in bytes
	47	+#define FR_STACK_SIZE 8
	48	+
	49	+#if defined(WIN32) \|\| defined(_WIN64)
	50	+ // maximum number of GP registers for inputs
	51	+ const int MAX_GR = 4;
	52	+ // maximum number of FP registers for inputs
	53	+ const int MAX_FR = 4;
	54	+ // WIN64 reserves 4 words for shadow space
	55	+ const int SHADOW = 4 * GR_STACK_SIZE;
	56	+#else
	57	+ // maximum number of GP registers for inputs
	58	+ const int MAX_GR = 6;
	59	+ // maximum number of FP registers for inputs
	60	+ const int MAX_FR = 8;
	61	+ // Linux x64 doesn't reserve shadow space
	62	+ const int SHADOW = 0;
	63	+#endif
	64	+
	65	+#else
	66	+// size of general-purpose value on the stack in bytes
	67	+#define GR_STACK_SIZE 4
	68	+// size of general-purpose value on the stack in bytes
	69	+#define FR_STACK_SIZE 8
	70	+
	71	+// maximum number of GP registers for inputs
	72	+const int MAX_GR = 0;
	73	+// maximum number of FP registers for inputs
	74	+const int MAX_FR = 0;
	75	+#endif
	76	+
	77	+typedef enum Reg_No {
	78	+#ifdef _EM64T_
	79	+ rax_reg = 0,rbx_reg, rcx_reg, rdx_reg,
	80	+ rdi_reg, rsi_reg, rsp_reg, rbp_reg,
	81	+ r8_reg, r9_reg, r10_reg, r11_reg,
	82	+ r12_reg, r13_reg, r14_reg, r15_reg,
	83	+ xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg,
	84	+ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg,
	85	+ xmm8_reg, xmm9_reg, xmm10_reg, xmm11_reg,
	86	+ xmm12_reg, xmm13_reg, xmm14_reg, xmm15_reg,
	87	+
	88	+#else // !defined(_EM64T_)
	89	+
	90	+ eax_reg = 0,ebx_reg, ecx_reg, edx_reg,
	91	+ edi_reg, esi_reg, esp_reg, ebp_reg,
	92	+ xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg,
	93	+ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg,
	94	+ fs_reg,
	95	+#endif
	96	+ /** @brief Total number of registers.*/
	97	+ n_reg
	98	+} Reg_No;
	99	+//
	100	+// instruction operand sizes: 8,16,32,64 bits
	101	+//
	102	+typedef enum Opnd_Size {
	103	+ size_8 = 0,
	104	+ size_16,
	105	+ size_32,
	106	+ size_64,
	107	+ n_size,
	108	+#ifdef _EM64T_
	109	+ size_platf = size_64
	110	+#else
	111	+ size_platf = size_32
	112	+#endif
	113	+} Opnd_Size;
	114	+
	115	+//
	116	+// opcodes for alu instructions
	117	+//
	118	+typedef enum ALU_Opcode {
	119	+ add_opc = 0,or_opc, adc_opc, sbb_opc,
	120	+ and_opc, sub_opc, xor_opc, cmp_opc,
	121	+ n_alu
	122	+} ALU_Opcode;
	123	+
	124	+//
	125	+// opcodes for shift instructions
	126	+//
	127	+typedef enum Shift_Opcode {
	128	+ shld_opc, shrd_opc, shl_opc, shr_opc,
	129	+ sar_opc, ror_opc, max_shift_opcode=6, n_shift = 6
	130	+} Shift_Opcode;
	131	+
	132	+typedef enum ConditionCode {
	133	+ Condition_O = 0,
	134	+ Condition_NO = 1,
	135	+ Condition_B = 2,
	136	+ Condition_NAE = Condition_B,
	137	+ Condition_C = Condition_B,
	138	+ Condition_NB = 3,
	139	+ Condition_AE = Condition_NB,
	140	+ Condition_NC = Condition_NB,
	141	+ Condition_Z = 4,
	142	+ Condition_E = Condition_Z,
	143	+ Condition_NZ = 5,
	144	+ Condition_NE = Condition_NZ,
	145	+ Condition_BE = 6,
	146	+ Condition_NA = Condition_BE,
	147	+ Condition_NBE = 7,
	148	+ Condition_A = Condition_NBE,
	149	+
	150	+ Condition_S = 8,
	151	+ Condition_NS = 9,
	152	+ Condition_P = 10,
	153	+ Condition_PE = Condition_P,
	154	+ Condition_NP = 11,
	155	+ Condition_PO = Condition_NP,
	156	+ Condition_L = 12,
	157	+ Condition_NGE = Condition_L,
	158	+ Condition_NL = 13,
	159	+ Condition_GE = Condition_NL,
	160	+ Condition_LE = 14,
	161	+ Condition_NG = Condition_LE,
	162	+ Condition_NLE = 15,
	163	+ Condition_G = Condition_NLE,
	164	+ Condition_Count = 16
	165	+} ConditionCode;
	166	+
	167	+//
	168	+// prefix code
	169	+//
	170	+typedef enum InstrPrefix {
	171	+ no_prefix,
	172	+ lock_prefix = 0xF0,
	173	+ hint_branch_taken_prefix = 0x2E,
	174	+ hint_branch_not_taken_prefix = 0x3E,
	175	+ prefix_repne = 0xF2,
	176	+ prefix_repnz = prefix_repne,
	177	+ prefix_repe = 0xF3,
	178	+ prefix_repz = prefix_repe,
	179	+ prefix_rep = 0xF3,
	180	+ prefix_cs = 0x2E,
	181	+ prefix_ss = 0x36,
	182	+ prefix_ds = 0x3E,
	183	+ prefix_es = 0x26,
	184	+ prefix_fs = 0x64,
	185	+ prefix_gs = 0x65
	186	+} InstrPrefix;
	187	+
	188	+
	189	+//
	190	+// an instruction operand
	191	+//
	192	+class Opnd {
	193	+
	194	+protected:
	195	+ enum Tag { SignedImm, UnsignedImm, Reg, Mem, FP, XMM };
	196	+
	197	+ const Tag tag;
	198	+
	199	+ Opnd(Tag t): tag(t) {}
	200	+
	201	+public:
	202	+ void * operator new(size_t, void * mem) {
	203	+ return mem;
	204	+ }
	205	+
	206	+ void operator delete(void *) {}
	207	+
	208	+ void operator delete(void , void ) {}
	209	+
	210	+private:
	211	+ // disallow copying
	212	+ Opnd(const Opnd &): tag(Mem) { assert(false); }
	213	+ Opnd& operator=(const Opnd &) { assert(false); return *this; }
	214	+};
	215	+typedef int I_32;
	216	+class Imm_Opnd: public Opnd {
	217	+
	218	+protected:
	219	+ union {
	220	+#ifdef _EM64T_
	221	+ int64 value;
	222	+ unsigned char bytes[8];
	223	+#else
	224	+ I_32 value;
	225	+ unsigned char bytes[4];
	226	+#endif
	227	+ };
	228	+ Opnd_Size size;
	229	+
	230	+public:
	231	+ Imm_Opnd(I_32 val, bool isSigned = true):
	232	+ Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(size_32) {
	233	+ if (isSigned) {
	234	+ if (CHAR_MIN <= val && val <= CHAR_MAX) {
	235	+ size = size_8;
	236	+ } else if (SHRT_MIN <= val && val <= SHRT_MAX) {
	237	+ size = size_16;
	238	+ }
	239	+ } else {
	240	+ assert(val >= 0);
	241	+ if (val <= UCHAR_MAX) {
	242	+ size = size_8;
	243	+ } else if (val <= USHRT_MAX) {
	244	+ size = size_16;
	245	+ }
	246	+ }
	247	+ }
	248	+ Imm_Opnd(const Imm_Opnd& that): Opnd(that.tag), value(that.value), size(that.size) {};
	249	+
	250	+#ifdef _EM64T_
	251	+ Imm_Opnd(Opnd_Size sz, int64 val, bool isSigned = true):
	252	+ Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(sz) {
	253	+#ifndef NDEBUG
	254	+ switch (size) {
	255	+ case size_8:
	256	+ assert(val == (int64)(I_8)val);
	257	+ break;
	258	+ case size_16:
	259	+ assert(val == (int64)(int16)val);
	260	+ break;
	261	+ case size_32:
	262	+ assert(val == (int64)(I_32)val);
	263	+ break;
	264	+ case size_64:
	265	+ break;
	266	+ case n_size:
	267	+ assert(false);
	268	+ break;
	269	+ }
	270	+#endif // NDEBUG
	271	+ }
	272	+
	273	+ int64 get_value() const { return value; }
	274	+
	275	+#else
	276	+
	277	+ Imm_Opnd(Opnd_Size sz, I_32 val, int isSigned = true):
	278	+ Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(sz) {
	279	+#ifndef NDEBUG
	280	+ switch (size) {
	281	+ case size_8:
	282	+ assert((I_32)val == (I_32)(I_8)val);
	283	+ break;
	284	+ case size_16:
	285	+ assert((I_32)val == (I_32)(int16)val);
	286	+ break;
	287	+ case size_32:
	288	+ break;
	289	+ case size_64:
	290	+ case n_size:
	291	+ assert(false);
	292	+ break;
	293	+ }
	294	+#endif // NDEBUG
	295	+ }
	296	+
	297	+ I_32 get_value() const { return value; }
	298	+
	299	+#endif
	300	+ Opnd_Size get_size() const { return size; }
	301	+ bool is_signed() const { return tag == SignedImm; }
	302	+};
	303	+
	304	+class RM_Opnd: public Opnd {
	305	+
	306	+public:
	307	+ bool is_reg() const { return tag != SignedImm && tag != UnsignedImm && tag != Mem; }
	308	+
	309	+protected:
	310	+ RM_Opnd(Tag t): Opnd(t) {}
	311	+
	312	+private:
	313	+ // disallow copying
	314	+ RM_Opnd(const RM_Opnd &): Opnd(Reg) { assert(false); }
	315	+};
	316	+
	317	+class R_Opnd: public RM_Opnd {
	318	+
	319	+protected:
	320	+ Reg_No _reg_no;
	321	+
	322	+public:
	323	+ R_Opnd(Reg_No r): RM_Opnd(Reg), _reg_no(r) {}
	324	+ Reg_No reg_no() const { return _reg_no; }
	325	+
	326	+private:
	327	+ // disallow copying
	328	+ R_Opnd(const R_Opnd &): RM_Opnd(Reg) { assert(false); }
	329	+};
	330	+
	331	+//
	332	+// a memory operand with displacement
	333	+// Can also serve as a full memory operand with base,index, displacement and scale.
	334	+// Use n_reg to specify 'no register', say, for index.
	335	+class M_Opnd: public RM_Opnd {
	336	+
	337	+protected:
	338	+ Imm_Opnd m_disp;
	339	+ Imm_Opnd m_scale;
	340	+ R_Opnd m_index;
	341	+ R_Opnd m_base;
	342	+
	343	+public:
	344	+ //M_Opnd(Opnd_Size sz): RM_Opnd(Mem, K_M, sz), m_disp(0), m_scale(0), m_index(n_reg), m_base(n_reg) {}
	345	+ M_Opnd(I_32 disp):
	346	+ RM_Opnd(Mem), m_disp(disp), m_scale(0), m_index(n_reg), m_base(n_reg) {}
	347	+ M_Opnd(Reg_No rbase, I_32 rdisp):
	348	+ RM_Opnd(Mem), m_disp(rdisp), m_scale(0), m_index(n_reg), m_base(rbase) {}
	349	+ M_Opnd(I_32 disp, Reg_No rbase, Reg_No rindex, unsigned scale):
	350	+ RM_Opnd(Mem), m_disp(disp), m_scale(scale), m_index(rindex), m_base(rbase) {}
	351	+ M_Opnd(const M_Opnd & that) : RM_Opnd(Mem),
	352	+ m_disp((int)that.m_disp.get_value()), m_scale((int)that.m_scale.get_value()),
	353	+ m_index(that.m_index.reg_no()), m_base(that.m_base.reg_no())
	354	+ {}
	355	+ //
	356	+ inline const R_Opnd & base(void) const { return m_base; }
	357	+ inline const R_Opnd & index(void) const { return m_index; }
	358	+ inline const Imm_Opnd & scale(void) const { return m_scale; }
	359	+ inline const Imm_Opnd & disp(void) const { return m_disp; }
	360	+};
	361	+
	362	+//
	363	+// a memory operand with base register and displacement
	364	+//
	365	+class M_Base_Opnd: public M_Opnd {
	366	+
	367	+public:
	368	+ M_Base_Opnd(Reg_No base, I_32 disp) : M_Opnd(disp, base, n_reg, 0) {}
	369	+
	370	+private:
	371	+ // disallow copying - but it leads to ICC errors #734 in encoder.inl
	372	+ // M_Base_Opnd(const M_Base_Opnd &): M_Opnd(0) { assert(false); }
	373	+};
	374	+
	375	+//
	376	+// a memory operand with base register, scaled index register
	377	+// and displacement.
	378	+//
	379	+class M_Index_Opnd : public M_Opnd {
	380	+
	381	+public:
	382	+ M_Index_Opnd(Reg_No base, Reg_No index, I_32 disp, unsigned scale):
	383	+ M_Opnd(disp, base, index, scale) {}
	384	+
	385	+private:
	386	+ // disallow copying - but it leads to ICC errors #734 in encoder.inl
	387	+ // M_Index_Opnd(const M_Index_Opnd &): M_Opnd(0) { assert(false); }
	388	+};
	389	+
	390	+class XMM_Opnd : public Opnd {
	391	+
	392	+protected:
	393	+ unsigned m_idx;
	394	+
	395	+public:
	396	+ XMM_Opnd(unsigned _idx): Opnd(XMM), m_idx(_idx) {};
	397	+ unsigned get_idx( void ) const { return m_idx; };
	398	+
	399	+private:
	400	+ // disallow copying
	401	+ XMM_Opnd(const XMM_Opnd &): Opnd(XMM) { assert(false); }
	402	+};
	403	+
	404	+//
	405	+// operand structures for ia32 registers
	406	+//
	407	+#ifdef _EM64T_
	408	+
	409	+extern R_Opnd rax_opnd;
	410	+extern R_Opnd rcx_opnd;
	411	+extern R_Opnd rdx_opnd;
	412	+extern R_Opnd rbx_opnd;
	413	+extern R_Opnd rdi_opnd;
	414	+extern R_Opnd rsi_opnd;
	415	+extern R_Opnd rsp_opnd;
	416	+extern R_Opnd rbp_opnd;
	417	+
	418	+extern R_Opnd r8_opnd;
	419	+extern R_Opnd r9_opnd;
	420	+extern R_Opnd r10_opnd;
	421	+extern R_Opnd r11_opnd;
	422	+extern R_Opnd r12_opnd;
	423	+extern R_Opnd r13_opnd;
	424	+extern R_Opnd r14_opnd;
	425	+extern R_Opnd r15_opnd;
	426	+
	427	+extern XMM_Opnd xmm8_opnd;
	428	+extern XMM_Opnd xmm9_opnd;
	429	+extern XMM_Opnd xmm10_opnd;
	430	+extern XMM_Opnd xmm11_opnd;
	431	+extern XMM_Opnd xmm12_opnd;
	432	+extern XMM_Opnd xmm13_opnd;
	433	+extern XMM_Opnd xmm14_opnd;
	434	+extern XMM_Opnd xmm15_opnd;
	435	+#else
	436	+
	437	+extern R_Opnd eax_opnd;
	438	+extern R_Opnd ecx_opnd;
	439	+extern R_Opnd edx_opnd;
	440	+extern R_Opnd ebx_opnd;
	441	+extern R_Opnd esp_opnd;
	442	+extern R_Opnd ebp_opnd;
	443	+extern R_Opnd esi_opnd;
	444	+extern R_Opnd edi_opnd;
	445	+
	446	+#endif // _EM64T_
	447	+
	448	+extern XMM_Opnd xmm0_opnd;
	449	+extern XMM_Opnd xmm1_opnd;
	450	+extern XMM_Opnd xmm2_opnd;
	451	+extern XMM_Opnd xmm3_opnd;
	452	+extern XMM_Opnd xmm4_opnd;
	453	+extern XMM_Opnd xmm5_opnd;
	454	+extern XMM_Opnd xmm6_opnd;
	455	+extern XMM_Opnd xmm7_opnd;
	456	+
	457	+#ifdef NO_ENCODER_INLINE
	458	+ #define ENCODER_DECLARE_EXPORT
	459	+#else
	460	+ #define ENCODER_DECLARE_EXPORT inline
	461	+ #include "encoder.inl"
	462	+#endif
	463	+
	464	+// prefix
	465	+ENCODER_DECLARE_EXPORT char * prefix(char * stream, InstrPrefix p);
	466	+
	467	+// stack push and pop instructions
	468	+ENCODER_DECLARE_EXPORT char * push(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	469	+ENCODER_DECLARE_EXPORT char * push(char * stream, const Imm_Opnd & imm);
	470	+ENCODER_DECLARE_EXPORT char * pop(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	471	+
	472	+// cmpxchg or xchg
	473	+ENCODER_DECLARE_EXPORT char * cmpxchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf);
	474	+ENCODER_DECLARE_EXPORT char * xchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf);
	475	+
	476	+// inc(rement), dec(rement), not, neg(ate) instructions
	477	+ENCODER_DECLARE_EXPORT char * inc(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	478	+ENCODER_DECLARE_EXPORT char * dec(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	479	+ENCODER_DECLARE_EXPORT char * _not(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	480	+ENCODER_DECLARE_EXPORT char * neg(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	481	+ENCODER_DECLARE_EXPORT char * nop(char * stream);
	482	+ENCODER_DECLARE_EXPORT char * int3(char * stream);
	483	+
	484	+// alu instructions: add, or, adc, sbb, and, sub, xor, cmp
	485	+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
	486	+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz = size_platf);
	487	+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	488	+
	489	+// test instruction
	490	+ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
	491	+ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf);
	492	+
	493	+// shift instructions: shl, shr, sar, shld, shrd, ror
	494	+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
	495	+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	496	+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
	497	+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf);
	498	+
	499	+// multiply instructions: mul, imul
	500	+ENCODER_DECLARE_EXPORT char * mul(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	501	+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	502	+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
	503	+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, const Imm_Opnd& imm, Opnd_Size sz = size_platf);
	504	+
	505	+// divide instructions: div, idiv
	506	+ENCODER_DECLARE_EXPORT char * idiv(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	507	+ENCODER_DECLARE_EXPORT char * div(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	508	+
	509	+// data movement: mov
	510	+ENCODER_DECLARE_EXPORT char * mov(char * stream, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz = size_platf);
	511	+ENCODER_DECLARE_EXPORT char * mov(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	512	+ENCODER_DECLARE_EXPORT char * mov(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
	513	+
	514	+ENCODER_DECLARE_EXPORT char * movsx( char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	515	+ENCODER_DECLARE_EXPORT char * movzx( char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	516	+
	517	+ENCODER_DECLARE_EXPORT char * movd(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm);
	518	+ENCODER_DECLARE_EXPORT char * movd(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm);
	519	+ENCODER_DECLARE_EXPORT char * movq(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm);
	520	+ENCODER_DECLARE_EXPORT char * movq(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm);
	521	+
	522	+// sse mov
	523	+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
	524	+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const M_Opnd & mem, const XMM_Opnd & xmm, bool dbl);
	525	+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
	526	+
	527	+// sse add, sub, mul, div
	528	+ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
	529	+ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
	530	+
	531	+ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
	532	+ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
	533	+
	534	+ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
	535	+ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
	536	+
	537	+ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
	538	+ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
	539	+
	540	+// xor, compare
	541	+ENCODER_DECLARE_EXPORT char * sse_xor(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1);
	542	+
	543	+ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
	544	+ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem, bool dbl);
	545	+
	546	+// sse conversions
	547	+ENCODER_DECLARE_EXPORT char * sse_cvt_si(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
	548	+ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const M_Opnd & mem, bool dbl);
	549	+ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const XMM_Opnd & xmm, bool dbl);
	550	+ENCODER_DECLARE_EXPORT char * sse_cvt_fp2dq(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
	551	+ENCODER_DECLARE_EXPORT char * sse_cvt_dq2fp(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
	552	+ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem64);
	553	+ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1);
	554	+ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem32);
	555	+ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1);
	556	+
	557	+// condition operations
	558	+ENCODER_DECLARE_EXPORT char * cmov(char * stream, ConditionCode cc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	559	+ENCODER_DECLARE_EXPORT char * setcc(char * stream, ConditionCode cc, const RM_Opnd & rm8);
	560	+
	561	+// load effective address: lea
	562	+ENCODER_DECLARE_EXPORT char * lea(char * stream, const R_Opnd & r, const M_Opnd & m, Opnd_Size sz = size_platf);
	563	+ENCODER_DECLARE_EXPORT char * cdq(char * stream);
	564	+ENCODER_DECLARE_EXPORT char * wait(char * stream);
	565	+
	566	+// control-flow instructions
	567	+ENCODER_DECLARE_EXPORT char * loop(char * stream, const Imm_Opnd & imm);
	568	+
	569	+// jump with 8-bit relative
	570	+ENCODER_DECLARE_EXPORT char * jump8(char * stream, const Imm_Opnd & imm);
	571	+
	572	+// jump with 32-bit relative
	573	+ENCODER_DECLARE_EXPORT char * jump32(char * stream, const Imm_Opnd & imm);
	574	+
	575	+// register indirect jump
	576	+ENCODER_DECLARE_EXPORT char * jump(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	577	+
	578	+// jump to target address
	579	+ENCODER_DECLARE_EXPORT char jump(char stream, char *target);
	580	+
	581	+// jump with displacement
	582	+//char * jump(char * stream, I_32 disp);
	583	+
	584	+// conditional branch with 8-bit branch offset
	585	+ENCODER_DECLARE_EXPORT char * branch8(char * stream, ConditionCode cc, const Imm_Opnd & imm, InstrPrefix prefix = no_prefix);
	586	+
	587	+// conditional branch with 32-bit branch offset
	588	+ENCODER_DECLARE_EXPORT char * branch32(char * stream, ConditionCode cc, const Imm_Opnd & imm, InstrPrefix prefix = no_prefix);
	589	+
	590	+// conditional branch with target label address
	591	+//char * branch(char * stream, ConditionCode cc, const char * target, InstrPrefix prefix = no_prefix);
	592	+
	593	+// conditional branch with displacement immediate
	594	+ENCODER_DECLARE_EXPORT char * branch(char * stream, ConditionCode cc, I_32 disp, InstrPrefix prefix = no_prefix);
	595	+
	596	+// call with displacement
	597	+ENCODER_DECLARE_EXPORT char * call(char * stream, const Imm_Opnd & imm);
	598	+
	599	+// indirect call through register or memory location
	600	+ENCODER_DECLARE_EXPORT char * call(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
	601	+
	602	+// call target address
	603	+ENCODER_DECLARE_EXPORT char * call(char * stream, const char * target);
	604	+
	605	+// return instruction
	606	+ENCODER_DECLARE_EXPORT char * ret(char * stream);
	607	+ENCODER_DECLARE_EXPORT char * ret(char * stream, unsigned short pop);
	608	+ENCODER_DECLARE_EXPORT char * ret(char * stream, const Imm_Opnd & imm);
	609	+
	610	+// string operations
	611	+ENCODER_DECLARE_EXPORT char * set_d(char * stream, bool set);
	612	+ENCODER_DECLARE_EXPORT char * scas(char * stream, unsigned char prefix);
	613	+ENCODER_DECLARE_EXPORT char * stos(char * stream, unsigned char prefix);
	614	+
	615	+// floating-point instructions
	616	+
	617	+// st(0) = st(0) fp_op m{32,64}real
	618	+//!char * fp_op_mem(char * stream, FP_Opcode opc,const M_Opnd& mem,int is_double);
	619	+
	620	+// st(0) = st(0) fp_op st(i)
	621	+//!char fp_op(char stream, FP_Opcode opc,unsigned i);
	622	+
	623	+// st(i) = st(i) fp_op st(0) ; optionally pop stack
	624	+//!char * fp_op(char * stream, FP_Opcode opc,unsigned i,unsigned pop_stk);
	625	+
	626	+// compare st(0),st(1) and pop stack twice
	627	+//!char * fcompp(char * stream);
	628	+ENCODER_DECLARE_EXPORT char * fldcw(char * stream, const M_Opnd & mem);
	629	+ENCODER_DECLARE_EXPORT char * fnstcw(char * stream, const M_Opnd & mem);
	630	+ENCODER_DECLARE_EXPORT char * fnstsw(char * stream);
	631	+//!char * fchs(char * stream);
	632	+//!char * frem(char * stream);
	633	+//!char * fxch(char * stream,unsigned i);
	634	+//!char * fcomip(char * stream, unsigned i);
	635	+
	636	+// load from memory (as fp) into fp register stack
	637	+ENCODER_DECLARE_EXPORT char * fld(char * stream, const M_Opnd & m, bool is_double);
	638	+//!char fld80(char stream,const M_Opnd& mem);
	639	+
	640	+// load from memory (as int) into fp register stack
	641	+//!char * fild(char * stream,const M_Opnd& mem,int is_long);
	642	+
	643	+// push st(i) onto fp register stack
	644	+//!char * fld(char * stream,unsigned i);
	645	+
	646	+// push the constants 0.0 and 1.0 onto the fp register stack
	647	+//!char * fldz(char * stream);
	648	+//!char * fld1(char * stream);
	649	+
	650	+// store stack to memory (as int), always popping the stack
	651	+ENCODER_DECLARE_EXPORT char * fist(char * stream, const M_Opnd & mem, bool is_long, bool pop_stk);
	652	+// store stack to to memory (as fp), optionally popping the stack
	653	+ENCODER_DECLARE_EXPORT char * fst(char * stream, const M_Opnd & m, bool is_double, bool pop_stk);
	654	+// store ST(0) to ST(i), optionally popping the stack. Takes 1 clock
	655	+ENCODER_DECLARE_EXPORT char * fst(char * stream, unsigned i, bool pop_stk);
	656	+
	657	+//!char * pushad(char * stream);
	658	+//!char * pushfd(char * stream);
	659	+//!char * popad(char * stream);
	660	+//!char * popfd(char * stream);
	661	+
	662	+// stack frame allocation instructions: enter & leave
	663	+//
	664	+// enter frame_size
	665	+//
	666	+// is equivalent to:
	667	+//
	668	+// push ebp
	669	+// mov ebp,esp
	670	+// sub esp,frame_size
	671	+//
	672	+//!char enter(char stream,const Imm_Opnd& imm);
	673	+
	674	+// leave
	675	+// is equivalent to:
	676	+//
	677	+// mov esp,ebp
	678	+// pop ebp
	679	+//!char leave(char stream);
	680	+
	681	+// sahf loads SF, ZF, AF, PF, and CF flags from eax
	682	+//!char sahf(char stream);
	683	+
	684	+// Intrinsic FP math functions
	685	+
	686	+//!char math_fsin(char stream);
	687	+//!char math_fcos(char stream);
	688	+//!char math_fabs(char stream);
	689	+//!char math_fpatan(char stream);
	690	+ENCODER_DECLARE_EXPORT char * fprem(char * stream);
	691	+ENCODER_DECLARE_EXPORT char * fprem1(char * stream);
	692	+//!char math_frndint(char stream);
	693	+//!char math_fptan(char stream);
	694	+
	695	+//
	696	+// Add 1-7 bytes padding, with as few instructions as possible,
	697	+// with no effect on the processor state (e.g., registers, flags)
	698	+//
	699	+//!char padding(char stream, unsigned num);
	700	+
	701	+// prolog and epilog code generation
	702	+//- char prolog(char stream,unsigned frame_size,unsigned reg_save_mask);
	703	+//- char epilog(char stream,unsigned reg_save_mask);
	704	+
	705	+//!extern R_Opnd reg_operand_array[];
	706	+
	707	+// fsave and frstor
	708	+//!char fsave(char stream);
	709	+//!char frstor(char stream);
	710	+
	711	+// lahf : Load Status Flags into AH Register
	712	+//!char lahf(char stream);
	713	+
	714	+// mfence : Memory Fence
	715	+//!char mfence(char stream);
	716	+
	717	+#endif // _VM_ENCODER_H_

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/libenc/encoder.inl

		@@ -0,0 +1,863 @@
	1	+/*
	2	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	3	+ * contributor license agreements. See the NOTICE file distributed with
	4	+ * this work for additional information regarding copyright ownership.
	5	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	6	+ * (the "License"); you may not use this file except in compliance with
	7	+ * the License. You may obtain a copy of the License at
	8	+ *
	9	+ * http://www.apache.org/licenses/LICENSE-2.0
	10	+ *
	11	+ * Unless required by applicable law or agreed to in writing, software
	12	+ * distributed under the License is distributed on an "AS IS" BASIS,
	13	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+ * See the License for the specific language governing permissions and
	15	+ * limitations under the License.
	16	+ */
	17	+/**
	18	+ * @author Alexander V. Astapchuk
	19	+ */
	20	+#include <stdio.h>
	21	+#include <assert.h>
	22	+#include <limits.h>
	23	+
	24	+extern const RegName map_of_regno_2_regname[];
	25	+extern const OpndSize map_of_EncoderOpndSize_2_RealOpndSize[];
	26	+extern const Mnemonic map_of_alu_opcode_2_mnemonic[];
	27	+extern const Mnemonic map_of_shift_opcode_2_mnemonic[];
	28	+
	29	+// S_ stands for 'Signed'
	30	+extern const Mnemonic S_map_of_condition_code_2_branch_mnemonic[];
	31	+// U_ stands for 'Unsigned'
	32	+extern const Mnemonic U_map_of_condition_code_2_branch_mnemonic[];
	33	+
	34	+inline static RegName map_reg(Reg_No r) {
	35	+ assert(r >= 0 && r <= n_reg);
	36	+ return map_of_regno_2_regname[r];
	37	+}
	38	+
	39	+inline static OpndSize map_size(Opnd_Size o_size) {
	40	+ assert(o_size >= 0 && o_size <= n_size);
	41	+ return map_of_EncoderOpndSize_2_RealOpndSize[o_size];
	42	+}
	43	+
	44	+inline static Mnemonic map_alu(ALU_Opcode alu) {
	45	+ assert(alu >= 0 && alu < n_alu);
	46	+ return map_of_alu_opcode_2_mnemonic[alu];
	47	+}
	48	+
	49	+inline static Mnemonic map_shift(Shift_Opcode shc) {
	50	+ assert(shc >= 0 && shc < n_shift);
	51	+ return map_of_shift_opcode_2_mnemonic[shc];
	52	+}
	53	+
	54	+inline bool fit8(int64 val) {
	55	+ return (CHAR_MIN <= val) && (val <= CHAR_MAX);
	56	+}
	57	+
	58	+inline bool fit32(int64 val) {
	59	+ return (INT_MIN <= val) && (val <= INT_MAX);
	60	+}
	61	+
	62	+inline static void add_r(EncoderBase::Operands & args, const R_Opnd & r, Opnd_Size sz, OpndExt ext = OpndExt_None) {
	63	+ RegName reg = map_reg(r.reg_no());
	64	+ if (sz != n_size) {
	65	+ OpndSize size = map_size(sz);
	66	+ if (size != getRegSize(reg)) {
	67	+ reg = getAliasReg(reg, size);
	68	+ }
	69	+ }
	70	+ args.add(EncoderBase::Operand(reg, ext));
	71	+}
	72	+
	73	+inline static void add_m(EncoderBase::Operands & args, const M_Opnd & m, Opnd_Size sz, OpndExt ext = OpndExt_None) {
	74	+ assert(n_size != sz);
	75	+ args.add(EncoderBase::Operand(map_size(sz),
	76	+ map_reg(m.base().reg_no()), map_reg(m.index().reg_no()),
	77	+ (unsigned)m.scale().get_value(), (int)m.disp().get_value(), ext));
	78	+}
	79	+
	80	+inline static void add_rm(EncoderBase::Operands & args, const RM_Opnd & rm, Opnd_Size sz, OpndExt ext = OpndExt_None) {
	81	+ rm.is_reg() ? add_r(args, (R_Opnd &)rm, sz, ext) : add_m(args, (M_Opnd &)rm, sz, ext);
	82	+}
	83	+
	84	+inline static void add_xmm(EncoderBase::Operands & args, const XMM_Opnd & xmm, bool dbl) {
	85	+ // Gregory -
	86	+ // XMM registers indexes in Reg_No enum are shifted by xmm0_reg, their indexes
	87	+ // don't start with 0, so it is necessary to subtract xmm0_reg index from
	88	+ // xmm.get_idx() value
	89	+ assert(xmm.get_idx() >= xmm0_reg);
	90	+ return args.add((RegName)( (dbl ? RegName_XMM0D : RegName_XMM0S) + xmm.get_idx() -
	91	+ xmm0_reg));
	92	+}
	93	+
	94	+inline static void add_fp(EncoderBase::Operands & args, unsigned i, bool dbl) {
	95	+ return args.add((RegName)( (dbl ? RegName_FP0D : RegName_FP0S) + i));
	96	+}
	97	+
	98	+inline static void add_imm(EncoderBase::Operands & args, const Imm_Opnd & imm) {
	99	+ assert(n_size != imm.get_size());
	100	+ args.add(EncoderBase::Operand(map_size(imm.get_size()), imm.get_value(),
	101	+ imm.is_signed() ? OpndExt_Signed : OpndExt_Zero));
	102	+}
	103	+
	104	+ENCODER_DECLARE_EXPORT char * prefix(char * stream, InstrPrefix p) {
	105	+ *stream = (char)p;
	106	+ return stream + 1;
	107	+}
	108	+
	109	+// stack push and pop instructions
	110	+ENCODER_DECLARE_EXPORT char * push(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	111	+ EncoderBase::Operands args;
	112	+ add_rm(args, rm, sz);
	113	+ return (char*)EncoderBase::encode(stream, Mnemonic_PUSH, args);
	114	+}
	115	+
	116	+ENCODER_DECLARE_EXPORT char * push(char * stream, const Imm_Opnd & imm) {
	117	+ EncoderBase::Operands args;
	118	+#ifdef _EM64T_
	119	+ add_imm(args, imm);
	120	+#else
	121	+ // we need this workaround to be compatible with the former ia32 encoder implementation
	122	+ add_imm(args, Imm_Opnd(size_32, imm.get_value()));
	123	+#endif
	124	+ return EncoderBase::encode(stream, Mnemonic_PUSH, args);
	125	+}
	126	+
	127	+ENCODER_DECLARE_EXPORT char * pop(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	128	+ EncoderBase::Operands args;
	129	+ add_rm(args, rm, sz);
	130	+ return (char*)EncoderBase::encode(stream, Mnemonic_POP, args);
	131	+}
	132	+
	133	+// cmpxchg or xchg
	134	+ENCODER_DECLARE_EXPORT char * cmpxchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) {
	135	+ EncoderBase::Operands args;
	136	+ add_rm(args, rm, sz);
	137	+ add_r(args, r, sz);
	138	+ RegName implicitReg = getAliasReg(RegName_EAX, map_size(sz));
	139	+ args.add(implicitReg);
	140	+ return (char*)EncoderBase::encode(stream, Mnemonic_CMPXCHG, args);
	141	+}
	142	+
	143	+ENCODER_DECLARE_EXPORT char * xchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) {
	144	+ EncoderBase::Operands args;
	145	+ add_rm(args, rm, sz);
	146	+ add_r(args, r, sz);
	147	+ return (char*)EncoderBase::encode(stream, Mnemonic_XCHG, args);
	148	+}
	149	+
	150	+// inc(rement), dec(rement), not, neg(ate) instructions
	151	+ENCODER_DECLARE_EXPORT char * inc(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	152	+ EncoderBase::Operands args;
	153	+ add_rm(args, rm, sz);
	154	+ return (char*)EncoderBase::encode(stream, Mnemonic_INC, args);
	155	+}
	156	+
	157	+ENCODER_DECLARE_EXPORT char * dec(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	158	+ EncoderBase::Operands args;
	159	+ add_rm(args, rm, sz);
	160	+ return (char*)EncoderBase::encode(stream, Mnemonic_DEC, args);
	161	+}
	162	+
	163	+ENCODER_DECLARE_EXPORT char * _not(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	164	+ EncoderBase::Operands args;
	165	+ add_rm(args, rm, sz);
	166	+ return (char*)EncoderBase::encode(stream, Mnemonic_NOT, args);
	167	+}
	168	+
	169	+ENCODER_DECLARE_EXPORT char * neg(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	170	+ EncoderBase::Operands args;
	171	+ add_rm(args, rm, sz);
	172	+ return (char*)EncoderBase::encode(stream, Mnemonic_NEG, args);
	173	+}
	174	+
	175	+ENCODER_DECLARE_EXPORT char * nop(char * stream) {
	176	+ EncoderBase::Operands args;
	177	+ return (char*)EncoderBase::encode(stream, Mnemonic_NOP, args);
	178	+}
	179	+
	180	+ENCODER_DECLARE_EXPORT char * int3(char * stream) {
	181	+ EncoderBase::Operands args;
	182	+ return (char*)EncoderBase::encode(stream, Mnemonic_INT3, args);
	183	+}
	184	+
	185	+// alu instructions: add, or, adc, sbb, and, sub, xor, cmp
	186	+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) {
	187	+ EncoderBase::Operands args;
	188	+ add_rm(args, rm, sz);
	189	+ add_imm(args, imm);
	190	+ return (char*)EncoderBase::encode(stream, map_alu(opc), args);
	191	+};
	192	+
	193	+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz) {
	194	+ EncoderBase::Operands args;
	195	+ add_rm(args, m, sz);
	196	+ add_rm(args, r, sz);
	197	+ return (char*)EncoderBase::encode(stream, map_alu(opc), args);
	198	+}
	199	+
	200	+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
	201	+ EncoderBase::Operands args;
	202	+ add_rm(args, r, sz);
	203	+ add_rm(args, rm, sz);
	204	+ return (char*)EncoderBase::encode(stream, map_alu(opc), args);
	205	+}
	206	+
	207	+// test instruction
	208	+ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) {
	209	+ EncoderBase::Operands args;
	210	+ add_rm(args, rm, sz);
	211	+ assert(imm.get_size() <= sz);
	212	+ add_imm(args, imm);
	213	+ return (char*)EncoderBase::encode(stream, Mnemonic_TEST, args);
	214	+}
	215	+
	216	+ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) {
	217	+ EncoderBase::Operands args;
	218	+ add_rm(args, rm, sz);
	219	+ add_r(args, r, sz);
	220	+ return (char*)EncoderBase::encode(stream, Mnemonic_TEST, args);
	221	+}
	222	+
	223	+// shift instructions: shl, shr, sar, shld, shrd
	224	+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) {
	225	+ EncoderBase::Operands args;
	226	+ add_rm(args, rm, sz);
	227	+ add_imm(args, imm);
	228	+ return (char*)EncoderBase::encode(stream, map_shift(shc), args);
	229	+}
	230	+
	231	+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, Opnd_Size sz) {
	232	+ EncoderBase::Operands args;
	233	+ add_rm(args, rm, sz);
	234	+ args.add(RegName_CL);
	235	+ return (char*)EncoderBase::encode(stream, map_shift(shc), args);
	236	+}
	237	+
	238	+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm,
	239	+ const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz) {
	240	+ EncoderBase::Operands args;
	241	+ assert(shc == shld_opc \|\| shc == shrd_opc);
	242	+ add_rm(args, rm, sz);
	243	+ add_r(args, r, sz);
	244	+ add_imm(args, imm);
	245	+ return (char*)EncoderBase::encode(stream, map_shift(shc), args);
	246	+}
	247	+
	248	+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm,
	249	+ const R_Opnd & r, Opnd_Size sz) {
	250	+ EncoderBase::Operands args;
	251	+ assert(shc == shld_opc \|\| shc == shrd_opc);
	252	+ add_rm(args, rm, sz);
	253	+ add_r(args, r, sz);
	254	+ args.add(RegName_CL);
	255	+ return (char*)EncoderBase::encode(stream, map_shift(shc), args);
	256	+}
	257	+
	258	+// multiply instructions: mul, imul
	259	+ENCODER_DECLARE_EXPORT char * mul(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	260	+ EncoderBase::Operands args;
	261	+ args.add(RegName_EDX);
	262	+ args.add(RegName_EAX);
	263	+ add_rm(args, rm, sz);
	264	+ return (char*)EncoderBase::encode(stream, Mnemonic_MUL, args);
	265	+}
	266	+
	267	+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
	268	+ EncoderBase::Operands args;
	269	+ add_r(args, r, sz);
	270	+ add_rm(args, rm, sz);
	271	+ return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args);
	272	+}
	273	+
	274	+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz) {
	275	+ EncoderBase::Operands args;
	276	+ add_r(args, r, sz);
	277	+ add_imm(args, imm);
	278	+ return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args);
	279	+}
	280	+
	281	+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm,
	282	+ const Imm_Opnd & imm, Opnd_Size sz) {
	283	+ EncoderBase::Operands args;
	284	+ add_r(args, r, sz);
	285	+ add_rm(args, rm, sz);
	286	+ add_imm(args, imm);
	287	+ return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args);
	288	+}
	289	+
	290	+// divide instructions: div, idiv
	291	+ENCODER_DECLARE_EXPORT char * idiv(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	292	+ EncoderBase::Operands args;
	293	+#ifdef _EM64T_
	294	+ add_r(args, rdx_opnd, sz);
	295	+ add_r(args, rax_opnd, sz);
	296	+#else
	297	+ add_r(args, edx_opnd, sz);
	298	+ add_r(args, eax_opnd, sz);
	299	+#endif
	300	+ add_rm(args, rm, sz);
	301	+ return (char*)EncoderBase::encode(stream, Mnemonic_IDIV, args);
	302	+}
	303	+
	304	+ENCODER_DECLARE_EXPORT char * div(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	305	+ EncoderBase::Operands args;
	306	+#ifdef _EM64T_
	307	+ add_r(args, rdx_opnd, sz);
	308	+ add_r(args, rax_opnd, sz);
	309	+#else
	310	+ add_r(args, edx_opnd, sz);
	311	+ add_r(args, eax_opnd, sz);
	312	+#endif
	313	+ add_rm(args, rm, sz);
	314	+ return (char*)EncoderBase::encode(stream, Mnemonic_DIV, args);
	315	+}
	316	+
	317	+// data movement: mov
	318	+ENCODER_DECLARE_EXPORT char * mov(char * stream, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz) {
	319	+ EncoderBase::Operands args;
	320	+ add_m(args, m, sz);
	321	+ add_r(args, r, sz);
	322	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args);
	323	+}
	324	+
	325	+ENCODER_DECLARE_EXPORT char * mov(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
	326	+ EncoderBase::Operands args;
	327	+ add_r(args, r, sz);
	328	+ add_rm(args, rm, sz);
	329	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args);
	330	+}
	331	+
	332	+ENCODER_DECLARE_EXPORT char * mov(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) {
	333	+ EncoderBase::Operands args;
	334	+ add_rm(args, rm, sz);
	335	+ add_imm(args, imm);
	336	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args);
	337	+}
	338	+
	339	+ENCODER_DECLARE_EXPORT char * movd(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm) {
	340	+ EncoderBase::Operands args;
	341	+ add_rm(args, rm, size_32);
	342	+ add_xmm(args, xmm, false);
	343	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVD, args);
	344	+}
	345	+
	346	+ENCODER_DECLARE_EXPORT char * movd(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm) {
	347	+ EncoderBase::Operands args;
	348	+ add_xmm(args, xmm, false);
	349	+ add_rm(args, rm, size_32);
	350	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVD, args);
	351	+}
	352	+
	353	+ENCODER_DECLARE_EXPORT char * movq(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm) {
	354	+ EncoderBase::Operands args;
	355	+ add_rm(args, rm, size_64);
	356	+ add_xmm(args, xmm, true);
	357	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVQ, args);
	358	+}
	359	+
	360	+ENCODER_DECLARE_EXPORT char * movq(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm) {
	361	+ EncoderBase::Operands args;
	362	+ add_xmm(args, xmm, true);
	363	+ add_rm(args, rm, size_64);
	364	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVQ, args);
	365	+}
	366	+
	367	+ENCODER_DECLARE_EXPORT char * movsx(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
	368	+ EncoderBase::Operands args;
	369	+ add_r(args, r, n_size);
	370	+ add_rm(args, rm, sz, OpndExt_Signed);
	371	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVSX, args);
	372	+}
	373	+
	374	+ENCODER_DECLARE_EXPORT char * movzx(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
	375	+ EncoderBase::Operands args;
	376	+ add_r(args, r, n_size);
	377	+ // movzx r64, r/m32 is not available on em64t
	378	+ // mov r32, r/m32 should zero out upper bytes
	379	+ assert(sz <= size_16);
	380	+ add_rm(args, rm, sz, OpndExt_Zero);
	381	+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVZX, args);
	382	+}
	383	+
	384	+// sse mov
	385	+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
	386	+ EncoderBase::Operands args;
	387	+ add_xmm(args, xmm, dbl);
	388	+ add_m(args, mem, dbl ? size_64 : size_32);
	389	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args);
	390	+}
	391	+
	392	+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const M_Opnd & mem, const XMM_Opnd & xmm, bool dbl) {
	393	+ EncoderBase::Operands args;
	394	+ add_m(args, mem, dbl ? size_64 : size_32);
	395	+ add_xmm(args, xmm, dbl);
	396	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args);
	397	+}
	398	+
	399	+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
	400	+ EncoderBase::Operands args;
	401	+ add_xmm(args, xmm0, dbl);
	402	+ add_xmm(args, xmm1, dbl);
	403	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args );
	404	+}
	405	+
	406	+// sse add, sub, mul, div
	407	+ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
	408	+ EncoderBase::Operands args;
	409	+ add_xmm(args, xmm, dbl);
	410	+ add_m(args, mem, dbl ? size_64 : size_32);
	411	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_ADDSD : Mnemonic_ADDSS, args);
	412	+}
	413	+
	414	+ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
	415	+ EncoderBase::Operands args;
	416	+ add_xmm(args, xmm0, dbl);
	417	+ add_xmm(args, xmm1, dbl);
	418	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_ADDSD : Mnemonic_ADDSS, args);
	419	+}
	420	+
	421	+ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
	422	+ EncoderBase::Operands args;
	423	+ add_xmm(args, xmm, dbl);
	424	+ add_m(args, mem, dbl ? size_64 : size_32);
	425	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_SUBSD : Mnemonic_SUBSS, args);
	426	+}
	427	+
	428	+ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
	429	+ EncoderBase::Operands args;
	430	+ add_xmm(args, xmm0, dbl);
	431	+ add_xmm(args, xmm1, dbl);
	432	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_SUBSD : Mnemonic_SUBSS, args);
	433	+}
	434	+
	435	+ENCODER_DECLARE_EXPORT char * sse_mul( char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
	436	+ EncoderBase::Operands args;
	437	+ add_xmm(args, xmm, dbl);
	438	+ add_m(args, mem, dbl ? size_64 : size_32);
	439	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MULSD : Mnemonic_MULSS, args);
	440	+}
	441	+
	442	+ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd& xmm0, const XMM_Opnd& xmm1, bool dbl) {
	443	+ EncoderBase::Operands args;
	444	+ add_xmm(args, xmm0, dbl);
	445	+ add_xmm(args, xmm1, dbl);
	446	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MULSD : Mnemonic_MULSS, args);
	447	+}
	448	+
	449	+ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
	450	+ EncoderBase::Operands args;
	451	+ add_xmm(args, xmm, dbl);
	452	+ add_m(args, mem, dbl ? size_64 : size_32);
	453	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_DIVSD : Mnemonic_DIVSS, args);
	454	+}
	455	+
	456	+ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
	457	+ EncoderBase::Operands args;
	458	+ add_xmm(args, xmm0, dbl);
	459	+ add_xmm(args, xmm1, dbl);
	460	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_DIVSD : Mnemonic_DIVSS, args);
	461	+}
	462	+
	463	+ENCODER_DECLARE_EXPORT char * sse_xor(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) {
	464	+ EncoderBase::Operands args;
	465	+ add_xmm(args, xmm0, true);
	466	+ add_xmm(args, xmm1, true);
	467	+ return (char*)EncoderBase::encode(stream, Mnemonic_PXOR, args);
	468	+}
	469	+
	470	+ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
	471	+ EncoderBase::Operands args;
	472	+ add_xmm(args, xmm0, true);
	473	+ add_xmm(args, xmm1, true);
	474	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_COMISD : Mnemonic_COMISS, args);
	475	+}
	476	+
	477	+ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem, bool dbl) {
	478	+ EncoderBase::Operands args;
	479	+ add_xmm(args, xmm0, dbl);
	480	+ add_m(args, mem, dbl ? size_64 : size_32);
	481	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_COMISD : Mnemonic_COMISS, args);
	482	+}
	483	+
	484	+// sse conversions
	485	+ENCODER_DECLARE_EXPORT char * sse_cvt_si(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
	486	+ EncoderBase::Operands args;
	487	+ add_xmm(args, xmm, dbl);
	488	+ add_m(args, mem, size_32);
	489	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTSI2SD : Mnemonic_CVTSI2SS, args);
	490	+}
	491	+
	492	+ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const M_Opnd & mem, bool dbl) {
	493	+ EncoderBase::Operands args;
	494	+ add_rm(args, reg, size_32);
	495	+ add_m(args, mem, dbl ? size_64 : size_32);
	496	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI, args);
	497	+}
	498	+
	499	+ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const XMM_Opnd & xmm, bool dbl) {
	500	+ EncoderBase::Operands args;
	501	+ add_rm(args, reg, size_32);
	502	+ add_xmm(args, xmm, dbl);
	503	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI, args);
	504	+}
	505	+
	506	+ENCODER_DECLARE_EXPORT char * sse_cvt_fp2dq(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
	507	+ EncoderBase::Operands args;
	508	+ add_xmm(args, xmm0, dbl);
	509	+ add_xmm(args, xmm1, dbl);
	510	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTPD2DQ : Mnemonic_CVTTPS2DQ, args);
	511	+}
	512	+
	513	+ENCODER_DECLARE_EXPORT char * sse_cvt_dq2fp(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
	514	+ EncoderBase::Operands args;
	515	+ add_xmm(args, xmm0, dbl);
	516	+ add_xmm(args, xmm1, dbl);
	517	+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTDQ2PD : Mnemonic_CVTDQ2PS, args);
	518	+}
	519	+
	520	+ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem64) {
	521	+ EncoderBase::Operands args;
	522	+ add_xmm(args, xmm0, false);
	523	+ add_m(args, mem64, size_64);
	524	+ return (char*)EncoderBase::encode(stream, Mnemonic_CVTSD2SS, args);
	525	+}
	526	+
	527	+ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) {
	528	+ EncoderBase::Operands args;
	529	+ add_xmm(args, xmm0, false);
	530	+ add_xmm(args, xmm1, true);
	531	+ return (char*)EncoderBase::encode(stream, Mnemonic_CVTSD2SS, args);
	532	+}
	533	+
	534	+ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem32) {
	535	+ EncoderBase::Operands args;
	536	+ add_xmm(args, xmm0, true);
	537	+ add_m(args, mem32, size_32);
	538	+ return (char*)EncoderBase::encode(stream, Mnemonic_CVTSS2SD, args);
	539	+}
	540	+
	541	+ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) {
	542	+ EncoderBase::Operands args;
	543	+ add_xmm(args, xmm0, true);
	544	+ add_xmm(args, xmm1, false);
	545	+ return (char*)EncoderBase::encode(stream, Mnemonic_CVTSS2SD, args);
	546	+}
	547	+
	548	+// condition operations
	549	+ENCODER_DECLARE_EXPORT char cmov(char stream, ConditionCode cc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
	550	+ EncoderBase::Operands args;
	551	+ add_r(args, r, sz);
	552	+ add_rm(args, rm, sz);
	553	+ return (char*)EncoderBase::encode(stream, (Mnemonic)(Mnemonic_CMOVcc + cc), args);
	554	+}
	555	+
	556	+ENCODER_DECLARE_EXPORT char * setcc(char * stream, ConditionCode cc, const RM_Opnd & rm8) {
	557	+ EncoderBase::Operands args;
	558	+ add_rm(args, rm8, size_8);
	559	+ return (char*)EncoderBase::encode(stream, (Mnemonic)(Mnemonic_SETcc + cc), args);
	560	+}
	561	+
	562	+// load effective address: lea
	563	+ENCODER_DECLARE_EXPORT char * lea(char * stream, const R_Opnd & r, const M_Opnd & m, Opnd_Size sz) {
	564	+ EncoderBase::Operands args;
	565	+ add_r(args, r, sz);
	566	+ add_m(args, m, sz);
	567	+ return (char*)EncoderBase::encode(stream, Mnemonic_LEA, args);
	568	+}
	569	+
	570	+ENCODER_DECLARE_EXPORT char * cdq(char * stream) {
	571	+ EncoderBase::Operands args;
	572	+ args.add(RegName_EDX);
	573	+ args.add(RegName_EAX);
	574	+ return (char*)EncoderBase::encode(stream, Mnemonic_CDQ, args);
	575	+}
	576	+
	577	+ENCODER_DECLARE_EXPORT char * wait(char * stream) {
	578	+ return (char*)EncoderBase::encode(stream, Mnemonic_WAIT, EncoderBase::Operands());
	579	+}
	580	+
	581	+// control-flow instructions
	582	+
	583	+// loop
	584	+ENCODER_DECLARE_EXPORT char * loop(char * stream, const Imm_Opnd & imm) {
	585	+ EncoderBase::Operands args;
	586	+ assert(imm.get_size() == size_8);
	587	+ args.add(RegName_ECX);
	588	+ add_imm(args, imm);
	589	+ return (char*)EncoderBase::encode(stream, Mnemonic_LOOP, args);
	590	+}
	591	+
	592	+// jump
	593	+ENCODER_DECLARE_EXPORT char * jump8(char * stream, const Imm_Opnd & imm) {
	594	+ EncoderBase::Operands args;
	595	+ assert(imm.get_size() == size_8);
	596	+ add_imm(args, imm);
	597	+ return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args);
	598	+}
	599	+
	600	+ENCODER_DECLARE_EXPORT char * jump32(char * stream, const Imm_Opnd & imm) {
	601	+ EncoderBase::Operands args;
	602	+ assert(imm.get_size() == size_32);
	603	+ add_imm(args, imm);
	604	+ return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args);
	605	+}
	606	+
	607	+ENCODER_DECLARE_EXPORT char * jump(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
	608	+ EncoderBase::Operands args;
	609	+ add_rm(args, rm, sz);
	610	+ return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args);
	611	+}
	612	+
	613	+/**
	614	+ * @note On EM64T: if target lies beyond 2G (does not fit into 32 bit
	615	+ * offset) then generates indirect jump using RAX (whose content is
	616	+ * destroyed).
	617	+ */
	618	+ENCODER_DECLARE_EXPORT char * jump(char * stream, char * target) {
	619	+#ifdef _EM64T_
	620	+ int64 offset = target - stream;
	621	+ // sub 2 bytes for the short version
	622	+ offset -= 2;
	623	+ if (fit8(offset)) {
	624	+ // use 8-bit signed relative form
	625	+ return jump8(stream, Imm_Opnd(size_8, offset));
	626	+ } else if (fit32(offset)) {
	627	+ // sub 5 (3 + 2)bytes for the long version
	628	+ offset -= 3;
	629	+ // use 32-bit signed relative form
	630	+ return jump32(stream, Imm_Opnd(size_32, offset));
	631	+ }
	632	+ // need to use absolute indirect jump
	633	+ stream = mov(stream, rax_opnd, Imm_Opnd(size_64, (int64)target), size_64);
	634	+ return jump(stream, rax_opnd, size_64);
	635	+#else
	636	+ I_32 offset = target - stream;
	637	+ // sub 2 bytes for the short version
	638	+ offset -= 2;
	639	+ if (fit8(offset)) {
	640	+ // use 8-bit signed relative form
	641	+ return jump8(stream, Imm_Opnd(size_8, offset));
	642	+ }
	643	+ // sub 5 (3 + 2) bytes for the long version
	644	+ offset -= 3;
	645	+ // use 32-bit signed relative form
	646	+ return jump32(stream, Imm_Opnd(size_32, offset));
	647	+#endif
	648	+}
	649	+
	650	+// branch
	651	+ENCODER_DECLARE_EXPORT char * branch8(char * stream, ConditionCode cond,
	652	+ const Imm_Opnd & imm,
	653	+ InstrPrefix pref)
	654	+{
	655	+ if (pref != no_prefix) {
	656	+ assert(pref == hint_branch_taken_prefix \|\| pref == hint_branch_taken_prefix);
	657	+ stream = prefix(stream, pref);
	658	+ }
	659	+ Mnemonic m = (Mnemonic)(Mnemonic_Jcc + cond);
	660	+ EncoderBase::Operands args;
	661	+ assert(imm.get_size() == size_8);
	662	+ add_imm(args, imm);
	663	+ return (char*)EncoderBase::encode(stream, m, args);
	664	+}
	665	+
	666	+ENCODER_DECLARE_EXPORT char * branch32(char * stream, ConditionCode cond,
	667	+ const Imm_Opnd & imm,
	668	+ InstrPrefix pref)
	669	+{
	670	+ if (pref != no_prefix) {
	671	+ assert(pref == hint_branch_taken_prefix \|\| pref == hint_branch_taken_prefix);
	672	+ stream = prefix(stream, pref);
	673	+ }
	674	+ Mnemonic m = (Mnemonic)(Mnemonic_Jcc + cond);
	675	+ EncoderBase::Operands args;
	676	+ assert(imm.get_size() == size_32);
	677	+ add_imm(args, imm);
	678	+ return (char*)EncoderBase::encode(stream, m, args);
	679	+}
	680	+
	681	+/*
	682	+ENCODER_DECLARE_EXPORT char * branch(char * stream, ConditionCode cc, const char * target, InstrPrefix prefix) {
	683	+// sub 2 bytes for the short version
	684	+int64 offset = stream-target-2;
	685	+if( fit8(offset) ) {
	686	+return branch8(stream, cc, Imm_Opnd(size_8, (char)offset), is_signed);
	687	+}
	688	+return branch32(stream, cc, Imm_Opnd(size_32, (int)offset), is_signed);
	689	+}
	690	+*/
	691	+
	692	+// call
	693	+ENCODER_DECLARE_EXPORT char * call(char * stream, const Imm_Opnd & imm)
	694	+{
	695	+ EncoderBase::Operands args;
	696	+ add_imm(args, imm);
	697	+ return (char*)EncoderBase::encode(stream, Mnemonic_CALL, args);
	698	+}
	699	+
	700	+ENCODER_DECLARE_EXPORT char * call(char * stream, const RM_Opnd & rm,
	701	+ Opnd_Size sz)
	702	+{
	703	+ EncoderBase::Operands args;
	704	+ add_rm(args, rm, sz);
	705	+ return (char*)EncoderBase::encode(stream, Mnemonic_CALL, args);
	706	+}
	707	+
	708	+/**
	709	+* @note On EM64T: if target lies beyond 2G (does not fit into 32 bit
	710	+* offset) then generates indirect jump using RAX (whose content is
	711	+* destroyed).
	712	+*/
	713	+ENCODER_DECLARE_EXPORT char * call(char * stream, const char * target)
	714	+{
	715	+#ifdef _EM64T_
	716	+ int64 offset = target - stream;
	717	+ if (fit32(offset)) {
	718	+ offset -= 5; // sub 5 bytes for this instruction
	719	+ Imm_Opnd imm(size_32, offset);
	720	+ return call(stream, imm);
	721	+ }
	722	+ // need to use absolute indirect call
	723	+ stream = mov(stream, rax_opnd, Imm_Opnd(size_64, (int64)target), size_64);
	724	+ return call(stream, rax_opnd, size_64);
	725	+#else
	726	+ I_32 offset = target - stream;
	727	+ offset -= 5; // sub 5 bytes for this instruction
	728	+ Imm_Opnd imm(size_32, offset);
	729	+ return call(stream, imm);
	730	+#endif
	731	+}
	732	+
	733	+// return instruction
	734	+ENCODER_DECLARE_EXPORT char * ret(char * stream)
	735	+{
	736	+ EncoderBase::Operands args;
	737	+ return (char*)EncoderBase::encode(stream, Mnemonic_RET, args);
	738	+}
	739	+
	740	+ENCODER_DECLARE_EXPORT char * ret(char * stream, const Imm_Opnd & imm)
	741	+{
	742	+ EncoderBase::Operands args;
	743	+ // TheManual says imm can be 16-bit only
	744	+ //assert(imm.get_size() <= size_16);
	745	+ args.add(EncoderBase::Operand(map_size(size_16), imm.get_value()));
	746	+ return (char*)EncoderBase::encode(stream, Mnemonic_RET, args);
	747	+}
	748	+
	749	+ENCODER_DECLARE_EXPORT char * ret(char * stream, unsigned short pop)
	750	+{
	751	+ // TheManual says it can only be imm16
	752	+ EncoderBase::Operands args(EncoderBase::Operand(OpndSize_16, pop, OpndExt_Zero));
	753	+ return (char*)EncoderBase::encode(stream, Mnemonic_RET, args);
	754	+}
	755	+
	756	+// floating-point instructions
	757	+ENCODER_DECLARE_EXPORT char * fld(char * stream, const M_Opnd & m,
	758	+ bool is_double) {
	759	+ EncoderBase::Operands args;
	760	+ // a fake FP register as operand
	761	+ add_fp(args, 0, is_double);
	762	+ add_m(args, m, is_double ? size_64 : size_32);
	763	+ return (char*)EncoderBase::encode(stream, Mnemonic_FLD, args);
	764	+}
	765	+
	766	+ENCODER_DECLARE_EXPORT char * fist(char * stream, const M_Opnd & mem,
	767	+ bool is_long, bool pop_stk)
	768	+{
	769	+ EncoderBase::Operands args;
	770	+ if (pop_stk) {
	771	+ add_m(args, mem, is_long ? size_64 : size_32);
	772	+ // a fake FP register as operand
	773	+ add_fp(args, 0, is_long);
	774	+ return (char*)EncoderBase::encode(stream, Mnemonic_FISTP, args);
	775	+ }
	776	+ // only 32-bit operands are supported
	777	+ assert(is_long == false);
	778	+ add_m(args, mem, size_32);
	779	+ add_fp(args, 0, false);
	780	+ return (char*)EncoderBase::encode(stream, Mnemonic_FIST, args);
	781	+}
	782	+
	783	+ENCODER_DECLARE_EXPORT char * fst(char * stream, const M_Opnd & m,
	784	+ bool is_double, bool pop_stk)
	785	+{
	786	+ EncoderBase::Operands args;
	787	+ add_m(args, m, is_double ? size_64 : size_32);
	788	+ // a fake FP register as operand
	789	+ add_fp(args, 0, is_double);
	790	+ return (char*)EncoderBase::encode(stream,
	791	+ pop_stk ? Mnemonic_FSTP : Mnemonic_FST,
	792	+ args);
	793	+}
	794	+
	795	+ENCODER_DECLARE_EXPORT char * fst(char * stream, unsigned i, bool pop_stk)
	796	+{
	797	+ EncoderBase::Operands args;
	798	+ add_fp(args, i, true);
	799	+ return (char*)EncoderBase::encode(stream,
	800	+ pop_stk ? Mnemonic_FSTP : Mnemonic_FST,
	801	+ args);
	802	+}
	803	+
	804	+ENCODER_DECLARE_EXPORT char * fldcw(char * stream, const M_Opnd & mem) {
	805	+ EncoderBase::Operands args;
	806	+ add_m(args, mem, size_16);
	807	+ return (char*)EncoderBase::encode(stream, Mnemonic_FLDCW, args);
	808	+}
	809	+
	810	+ENCODER_DECLARE_EXPORT char * fnstcw(char * stream, const M_Opnd & mem) {
	811	+ EncoderBase::Operands args;
	812	+ add_m(args, mem, size_16);
	813	+ return (char*)EncoderBase::encode(stream, Mnemonic_FNSTCW, args);
	814	+}
	815	+
	816	+ENCODER_DECLARE_EXPORT char * fnstsw(char * stream)
	817	+{
	818	+ return (char*)EncoderBase::encode(stream, Mnemonic_FNSTCW,
	819	+ EncoderBase::Operands());
	820	+}
	821	+
	822	+// string operations
	823	+ENCODER_DECLARE_EXPORT char * set_d(char * stream, bool set) {
	824	+ EncoderBase::Operands args;
	825	+ return (char*)EncoderBase::encode(stream,
	826	+ set ? Mnemonic_STD : Mnemonic_CLD,
	827	+ args);
	828	+}
	829	+
	830	+ENCODER_DECLARE_EXPORT char * scas(char * stream, unsigned char prefix)
	831	+{
	832	+ EncoderBase::Operands args;
	833	+ if (prefix != no_prefix) {
	834	+ assert(prefix == prefix_repnz \|\| prefix == prefix_repz);
	835	+ *stream = prefix;
	836	+ ++stream;
	837	+ }
	838	+ return (char*)EncoderBase::encode(stream, Mnemonic_SCAS, args);
	839	+}
	840	+
	841	+ENCODER_DECLARE_EXPORT char * stos(char * stream, unsigned char prefix)
	842	+{
	843	+ if (prefix != no_prefix) {
	844	+ assert(prefix == prefix_rep);
	845	+ *stream = prefix;
	846	+ ++stream;
	847	+ }
	848	+
	849	+ EncoderBase::Operands args;
	850	+ return (char*)EncoderBase::encode(stream, Mnemonic_STOS, args);
	851	+}
	852	+
	853	+// Intrinsic FP math functions
	854	+
	855	+ENCODER_DECLARE_EXPORT char * fprem(char * stream) {
	856	+ return (char*)EncoderBase::encode(stream, Mnemonic_FPREM,
	857	+ EncoderBase::Operands());
	858	+}
	859	+
	860	+ENCODER_DECLARE_EXPORT char * fprem1(char * stream) {
	861	+ return (char*)EncoderBase::encode(stream, Mnemonic_FPREM1,
	862	+ EncoderBase::Operands());
	863	+}

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/load_store.cpp

		@@ -0,0 +1,458 @@
	1	+/* libs/pixelflinger/codeflinger/x86/load_store.cpp
	2	+**
	3	+** Copyright 2006, The Android Open Source Project
	4	+**
	5	+** Licensed under the Apache License, Version 2.0 (the "License");
	6	+** you may not use this file except in compliance with the License.
	7	+** You may obtain a copy of the License at
	8	+**
	9	+** http://www.apache.org/licenses/LICENSE-2.0
	10	+**
	11	+** Unless required by applicable law or agreed to in writing, software
	12	+** distributed under the License is distributed on an "AS IS" BASIS,
	13	+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+** See the License for the specific language governing permissions and
	15	+** limitations under the License.
	16	+*/
	17	+
	18	+#include <assert.h>
	19	+#include <stdio.h>
	20	+#include <cutils/log.h>
	21	+
	22	+#include "codeflinger/x86/GGLX86Assembler.h"
	23	+
	24	+namespace android {
	25	+
	26	+// ----------------------------------------------------------------------------
	27	+
	28	+void GGLX86Assembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
	29	+{
	30	+ const int bits = addr.size;
	31	+ const int inc = (flags & WRITE_BACK)?1:0;
	32	+ switch (bits) {
	33	+ case 32:
	34	+ if (inc) {
	35	+ MOV_REG_TO_MEM(s.reg, 0, addr.reg);
	36	+ ADD_IMM_TO_REG(4, addr.reg);
	37	+ } else {
	38	+ MOV_REG_TO_MEM(s.reg, 0, addr.reg);
	39	+ }
	40	+ break;
	41	+ case 24:
	42	+ // 24 bits formats are a little special and used only for RGB
	43	+ // 0x00BBGGRR is unpacked as R,G,B
	44	+ MOV_REG_TO_MEM(s.reg, 0, addr.reg, OpndSize_8);
	45	+ ROR(8, s.reg);
	46	+ MOV_REG_TO_MEM(s.reg, 1, addr.reg, OpndSize_8);
	47	+ ROR(8, s.reg);
	48	+ MOV_REG_TO_MEM(s.reg, 2, addr.reg, OpndSize_8);
	49	+ if (!(s.flags & CORRUPTIBLE)) {
	50	+ ROR(16, s.reg);
	51	+ }
	52	+ if (inc) {
	53	+ ADD_IMM_TO_REG(3, addr.reg);
	54	+ }
	55	+ break;
	56	+ case 16:
	57	+ if (inc) {
	58	+ MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_16);
	59	+ ADD_IMM_TO_REG(2, addr.reg);
	60	+ } else {
	61	+ MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_16);
	62	+ }
	63	+ break;
	64	+ case 8:
	65	+ if (inc) {
	66	+ MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_8);
	67	+ ADD_IMM_TO_REG(1, addr.reg);
	68	+ } else {
	69	+ MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_8);
	70	+ }
	71	+ break;
	72	+ }
	73	+}
	74	+
	75	+void GGLX86Assembler::load(pointer_t& addr, const pixel_t& s, uint32_t flags)
	76	+{
	77	+ Scratch scratches(registerFile());
	78	+ int s0;
	79	+
	80	+ const int bits = addr.size;
	81	+ // WRITE_BACK indicates that the base register will also be updated after loading the data
	82	+ const int inc = (flags & WRITE_BACK)?1:0;
	83	+ switch (bits) {
	84	+ case 32:
	85	+ if (inc) {
	86	+ MOV_MEM_TO_REG(0, addr.reg, s.reg);
	87	+ ADD_IMM_TO_REG(4, addr.reg);
	88	+
	89	+ } else MOV_MEM_TO_REG(0, addr.reg, s.reg);
	90	+ break;
	91	+ case 24:
	92	+ // 24 bits formats are a little special and used only for RGB
	93	+ // R,G,B is packed as 0x00BBGGRR
	94	+ s0 = scratches.obtain();
	95	+ if (s.reg != addr.reg) {
	96	+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg); //R
	97	+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 1, s0); //G
	98	+ SHL(8, s0);
	99	+ OR_REG_TO_REG(s0, s.reg);
	100	+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 2, s0); //B
	101	+ SHL(16, s0);
	102	+ OR_REG_TO_REG(s0, s.reg);
	103	+ } else {
	104	+ int s1 = scratches.obtain();
	105	+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s1); //R
	106	+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 1, s0); //G
	107	+ SHL(8, s0);
	108	+ OR_REG_TO_REG(s0, s1);
	109	+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 2, s0); //B
	110	+ SHL(16, s0);
	111	+ OR_REG_TO_REG(s0, s1);
	112	+ MOV_REG_TO_REG(s1, s.reg);
	113	+ scratches.recycle(s1);
	114	+
	115	+ }
	116	+ scratches.recycle(s0);
	117	+ if (inc)
	118	+ ADD_IMM_TO_REG(3, addr.reg);
	119	+ break;
	120	+ case 16:
	121	+ if (inc) {
	122	+ MOVZX_MEM_TO_REG(OpndSize_16, addr.reg, 0, s.reg);
	123	+ ADD_IMM_TO_REG(2, addr.reg);
	124	+ }
	125	+ else MOVZX_MEM_TO_REG(OpndSize_16, addr.reg, 0, s.reg);
	126	+ break;
	127	+ case 8:
	128	+ if (inc) {
	129	+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg);
	130	+ ADD_IMM_TO_REG(1, addr.reg);
	131	+ }
	132	+ else MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg);
	133	+ break;
	134	+ }
	135	+ if (inc) MOV_REG_TO_MEM(addr.reg, addr.offset_ebp, PhysicalReg_EBP);
	136	+}
	137	+
	138	+void GGLX86Assembler::extract(integer_t& d, int s, int h, int l, int bits)
	139	+{
	140	+ const int maskLen = h-l;
	141	+
	142	+ assert(maskLen<=8);
	143	+ assert(h);
	144	+
	145	+
	146	+ if (h != bits) {
	147	+ const int mask = ((1<<maskLen)-1) << l;
	148	+ MOV_REG_TO_REG(s, d.reg);
	149	+ AND_IMM_TO_REG(mask, d.reg);// component = packed & mask;
	150	+ s = d.reg;
	151	+ }
	152	+
	153	+ if (l) {
	154	+ MOV_REG_TO_REG(s, d.reg);
	155	+ SHR(l, d.reg);// component = packed >> l;
	156	+ s = d.reg;
	157	+ }
	158	+
	159	+ if (s != d.reg) {
	160	+ MOV_REG_TO_REG(s, d.reg);
	161	+ }
	162	+
	163	+ d.s = maskLen;
	164	+}
	165	+
	166	+void GGLX86Assembler::extract(integer_t& d, const pixel_t& s, int component)
	167	+{
	168	+ extract(d, s.reg,
	169	+ s.format.c[component].h,
	170	+ s.format.c[component].l,
	171	+ s.size());
	172	+}
	173	+
	174	+void GGLX86Assembler::extract(component_t& d, const pixel_t& s, int component)
	175	+{
	176	+ integer_t r(d.reg, 32, d.flags, d.offset_ebp);
	177	+ extract(r, s.reg,
	178	+ s.format.c[component].h,
	179	+ s.format.c[component].l,
	180	+ s.size());
	181	+ d = component_t(r);
	182	+}
	183	+
	184	+
	185	+void GGLX86Assembler::expand(integer_t& d, const component_t& s, int dbits)
	186	+{
	187	+ if (s.l \|\| (s.flags & CLEAR_HI)) {
	188	+ extract(d, s.reg, s.h, s.l, 32);
	189	+ expand(d, d, dbits);
	190	+ } else {
	191	+ expand(d, integer_t(s.reg, s.size(), s.flags, s.offset_ebp), dbits);
	192	+ }
	193	+}
	194	+
	195	+void GGLX86Assembler::expand(component_t& d, const component_t& s, int dbits)
	196	+{
	197	+ integer_t r(d.reg, 32, d.flags, d.offset_ebp);
	198	+ expand(r, s, dbits);
	199	+ d = component_t(r);
	200	+}
	201	+
	202	+void GGLX86Assembler::expand(integer_t& dst, const integer_t& src, int dbits)
	203	+{
	204	+ assert(src.size());
	205	+
	206	+ Scratch scratches(registerFile());
	207	+ int sbits = src.size();
	208	+ int s = src.reg;
	209	+ int d = dst.reg;
	210	+
	211	+ // be sure to set 'dst' after we read 'src' as they may be identical
	212	+ dst.s = dbits;
	213	+ dst.flags = 0;
	214	+
	215	+ if (dbits<=sbits) {
	216	+ if (s != d) {
	217	+ MOV_REG_TO_REG(s, d);
	218	+ }
	219	+ return;
	220	+ }
	221	+
	222	+ if (sbits == 1) {
	223	+ MOV_REG_TO_REG(s, d);
	224	+ SHL(dbits, d);
	225	+ SUB_REG_TO_REG(s, d);
	226	+ // d = (s<<dbits) - s;
	227	+ return;
	228	+ }
	229	+
	230	+ if (dbits % sbits) {
	231	+ MOV_REG_TO_REG(s, d);
	232	+ SHL(dbits-sbits, d);
	233	+ // d = s << (dbits-sbits);
	234	+ dbits -= sbits;
	235	+ int temp = scratches.obtain();
	236	+ do {
	237	+ MOV_REG_TO_REG(d, temp);
	238	+ SHR(sbits, temp);
	239	+ OR_REG_TO_REG(temp, d);
	240	+ // d \|= d >> sbits;
	241	+ dbits -= sbits;
	242	+ sbits *= 2;
	243	+ } while(dbits>0);
	244	+ return;
	245	+ }
	246	+
	247	+ dbits -= sbits;
	248	+ do {
	249	+ MOV_REG_TO_REG(s, d);
	250	+ SHL(sbits, d);
	251	+ OR_REG_TO_REG(s, d);
	252	+ // d \|= d<<sbits;
	253	+ s = d;
	254	+ dbits -= sbits;
	255	+ if (sbits*2 < dbits) {
	256	+ sbits *= 2;
	257	+ }
	258	+ } while(dbits>0);
	259	+}
	260	+
	261	+void GGLX86Assembler::downshift(
	262	+ pixel_t& d, int component, component_t s, reg_t& dither)
	263	+{
	264	+ const needs_t& needs = mBuilderContext.needs;
	265	+ Scratch scratches(registerFile());
	266	+ // s(temp) is loaded in build_blending
	267	+ s.reg = scratches.obtain();
	268	+ MOV_MEM_TO_REG(s.offset_ebp, EBP, s.reg);
	269	+
	270	+ int sh = s.h;
	271	+ int sl = s.l;
	272	+ int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
	273	+ int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
	274	+ int sbits = sh - sl;
	275	+
	276	+ int dh = d.format.c[component].h;
	277	+ int dl = d.format.c[component].l;
	278	+ int dbits = dh - dl;
	279	+ int dithering = 0;
	280	+
	281	+ ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
	282	+
	283	+ if (sbits>dbits) {
	284	+ // see if we need to dither
	285	+ dithering = mDithering;
	286	+ }
	287	+
	288	+ int ireg = d.reg;
	289	+ if (!(d.flags & FIRST)) {
	290	+ if (s.flags & CORRUPTIBLE) {
	291	+ ireg = s.reg;
	292	+ } else {
	293	+ ireg = scratches.obtain();
	294	+ }
	295	+ }
	296	+ d.flags &= ~FIRST;
	297	+
	298	+ if (maskHiBits) {
	299	+ // we need to mask the high bits (and possibly the lowbits too)
	300	+ // and we might be able to use immediate mask.
	301	+ if (!dithering) {
	302	+ // we don't do this if we only have maskLoBits because we can
	303	+ // do it more efficiently below (in the case where dl=0)
	304	+ const int offset = sh - dbits;
	305	+ if (dbits<=8 && offset >= 0) {
	306	+ const uint32_t mask = ((1<<dbits)-1) << offset;
	307	+ build_and_immediate(ireg, s.reg, mask, 32);
	308	+ s.reg = ireg;
	309	+ sl = offset;
	310	+ sbits = dbits;
	311	+ maskLoBits = maskHiBits = 0;
	312	+ }
	313	+ } else {
	314	+ // in the dithering case though, we need to preserve the lower bits
	315	+ const uint32_t mask = ((1<<sbits)-1) << sl;
	316	+ build_and_immediate(ireg, s.reg, mask, 32);
	317	+ s.reg = ireg;
	318	+ maskLoBits = maskHiBits = 0;
	319	+ }
	320	+ }
	321	+
	322	+ // XXX: we could special case (maskHiBits & !maskLoBits)
	323	+ // like we do for maskLoBits below, but it happens very rarely
	324	+ // that we have maskHiBits only and the conditions necessary to lead
	325	+ // to better code (like doing d \|= s << 24)
	326	+
	327	+ if (maskHiBits) {
	328	+ MOV_REG_TO_REG(s.reg, ireg);
	329	+ SHL(32-sh, ireg);
	330	+ sl += 32-sh;
	331	+ sh = 32;
	332	+ s.reg = ireg;
	333	+ maskHiBits = 0;
	334	+ }
	335	+
	336	+ // Downsampling should be performed as follows:
	337	+ // V * ((1<<dbits)-1) / ((1<<sbits)-1)
	338	+ // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
	339	+ // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
	340	+ // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
	341	+ // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
	342	+ //
	343	+ // By approximating (1>>dbits) and (1>>sbits) to 0:
	344	+ //
	345	+ // V>>(sbits-dbits) - V>>sbits
	346	+ //
	347	+ // A good approximation is V>>(sbits-dbits),
	348	+ // but better one (needed for dithering) is:
	349	+ //
	350	+ // (V>>(sbits-dbits)<<sbits - V)>>sbits
	351	+ // (V<<dbits - V)>>sbits
	352	+ // (V - V>>dbits)>>(sbits-dbits)
	353	+
	354	+ // Dithering is done here
	355	+ if (dithering) {
	356	+ comment("dithering");
	357	+ if (sl) {
	358	+ MOV_REG_TO_REG(s.reg, ireg);
	359	+ SHR(sl, ireg);
	360	+ sh -= sl;
	361	+ sl = 0;
	362	+ s.reg = ireg;
	363	+ }
	364	+ // scaling (V-V>>dbits)
	365	+ int temp_reg = scratches.obtain();
	366	+ MOV_REG_TO_REG(s.reg, temp_reg);
	367	+ SHR(dbits, temp_reg);
	368	+ MOV_REG_TO_REG(s.reg, ireg);
	369	+ SUB_REG_TO_REG(temp_reg, ireg);
	370	+ scratches.recycle(temp_reg);
	371	+ const int shift = (GGL_DITHER_BITS - (sbits-dbits));
	372	+ dither.reg = scratches.obtain();
	373	+ MOV_MEM_TO_REG(dither.offset_ebp, EBP, dither.reg);
	374	+ if (shift>0) {
	375	+ temp_reg = scratches.obtain();
	376	+ MOV_REG_TO_REG(dither.reg, temp_reg);
	377	+ SHR(shift, temp_reg);
	378	+ ADD_REG_TO_REG(temp_reg, ireg);
	379	+ scratches.recycle(temp_reg);
	380	+ }
	381	+ else if (shift<0) {
	382	+ temp_reg = scratches.obtain();
	383	+ MOV_REG_TO_REG(dither.reg, temp_reg);
	384	+ SHL(-shift, temp_reg);
	385	+ ADD_REG_TO_REG(temp_reg, ireg);
	386	+ scratches.recycle(temp_reg);
	387	+ }
	388	+ else {
	389	+ ADD_REG_TO_REG(dither.reg, ireg);
	390	+ }
	391	+ scratches.recycle(dither.reg);
	392	+ s.reg = ireg;
	393	+ }
	394	+
	395	+ if ((maskLoBits\|dithering) && (sh > dbits)) {
	396	+ int shift = sh-dbits;
	397	+ if (dl) {
	398	+ MOV_REG_TO_REG(s.reg, ireg);
	399	+ SHR(shift, ireg);
	400	+ if (ireg == d.reg) {
	401	+ MOV_REG_TO_REG(ireg, d.reg);
	402	+ SHL(dl, d.reg);
	403	+ } else {
	404	+ int temp_reg = scratches.obtain();
	405	+ MOV_REG_TO_REG(ireg, temp_reg);
	406	+ SHL(dl, temp_reg);
	407	+ OR_REG_TO_REG(temp_reg, d.reg);
	408	+ scratches.recycle(temp_reg);
	409	+ }
	410	+ } else {
	411	+ if (ireg == d.reg) {
	412	+ MOV_REG_TO_REG(s.reg, d.reg);
	413	+ SHR(shift, d.reg);
	414	+ } else {
	415	+ int temp_reg = scratches.obtain();
	416	+ MOV_REG_TO_REG(s.reg, temp_reg);
	417	+ SHR(shift, temp_reg);
	418	+ OR_REG_TO_REG(temp_reg, d.reg);
	419	+ scratches.recycle(temp_reg);
	420	+ }
	421	+ }
	422	+ } else {
	423	+ int shift = sh-dh;
	424	+ if (shift>0) {
	425	+ if (ireg == d.reg) {
	426	+ MOV_REG_TO_REG(s.reg, d.reg);
	427	+ SHR(shift, d.reg);
	428	+ } else {
	429	+ int temp_reg = scratches.obtain();
	430	+ MOV_REG_TO_REG(s.reg, temp_reg);
	431	+ SHR(shift, temp_reg);
	432	+ OR_REG_TO_REG(temp_reg, d.reg);
	433	+ scratches.recycle(temp_reg);
	434	+ }
	435	+ } else if (shift<0) {
	436	+ if (ireg == d.reg) {
	437	+ MOV_REG_TO_REG(s.reg, d.reg);
	438	+ SHL(-shift, d.reg);
	439	+ } else {
	440	+ int temp_reg = scratches.obtain();
	441	+ MOV_REG_TO_REG(s.reg, temp_reg);
	442	+ SHL(-shift, temp_reg);
	443	+ OR_REG_TO_REG(temp_reg, d.reg);
	444	+ scratches.recycle(temp_reg);
	445	+ }
	446	+ } else {
	447	+ if (ireg == d.reg) {
	448	+ if (s.reg != d.reg) {
	449	+ MOV_REG_TO_REG(s.reg, d.reg);
	450	+ }
	451	+ } else {
	452	+ OR_REG_TO_REG(s.reg, d.reg);
	453	+ }
	454	+ }
	455	+ }
	456	+}
	457	+
	458	+}; // namespace android

--- /dev/null

+++ b/libpixelflinger/codeflinger/x86/texturing.cpp

		@@ -0,0 +1,1799 @@
	1	+/* libs/pixelflinger/codeflinger/x86/texturing.cpp
	2	+**
	3	+** Copyright 2006, The Android Open Source Project
	4	+**
	5	+** Licensed under the Apache License, Version 2.0 (the "License");
	6	+** you may not use this file except in compliance with the License.
	7	+** You may obtain a copy of the License at
	8	+**
	9	+** http://www.apache.org/licenses/LICENSE-2.0
	10	+**
	11	+** Unless required by applicable law or agreed to in writing, software
	12	+** distributed under the License is distributed on an "AS IS" BASIS,
	13	+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	+** See the License for the specific language governing permissions and
	15	+** limitations under the License.
	16	+*/
	17	+
	18	+#include <assert.h>
	19	+#include <stdint.h>
	20	+#include <stdlib.h>
	21	+#include <stdio.h>
	22	+#include <sys/types.h>
	23	+
	24	+#include <cutils/log.h>
	25	+
	26	+#include "codeflinger/x86/GGLX86Assembler.h"
	27	+
	28	+
	29	+namespace android {
	30	+
	31	+// ---------------------------------------------------------------------------
	32	+
	33	+// iterators are initialized like this:
	34	+// (intToFixedCenter(x) * dx)>>16 + x0
	35	+// ((x<<16 + 0x8000) * dx)>>16 + x0
	36	+// ((x<<16)dx + (0x8000dx))>>16 + x0
	37	+// ( (x*dx) + dx>>1 ) + x0
	38	+// (x*dx) + (dx>>1 + x0)
	39	+
	40	+void GGLX86Assembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x)
	41	+{
	42	+ context_t const* c = mBuilderContext.c;
	43	+ const needs_t& needs = mBuilderContext.needs;
	44	+ int temp_reg;
	45	+
	46	+ if (mSmooth) {
	47	+ // NOTE: we could take this case in the mDithering + !mSmooth case,
	48	+ // but this would use up to 4 more registers for the color components
	49	+ // for only a little added quality.
	50	+ // Currently, this causes the system to run out of registers in
	51	+ // some case (see issue #719496)
	52	+
	53	+ comment("compute initial iterated color (smooth and/or dither case)");
	54	+
	55	+ parts.iterated_packed = 0;
	56	+ parts.packed = 0;
	57	+
	58	+ // 0x1: color component
	59	+ // 0x2: iterators
	60	+ //parts.reload = 3;
	61	+ const int optReload = mOptLevel >> 1;
	62	+ if (optReload >= 3) parts.reload = 0; // reload nothing
	63	+ else if (optReload == 2) parts.reload = 2; // reload iterators
	64	+ else if (optReload == 1) parts.reload = 1; // reload colors
	65	+ else if (optReload <= 0) parts.reload = 3; // reload both
	66	+
	67	+ if (!mSmooth) {
	68	+ // we're not smoothing (just dithering), we never have to
	69	+ // reload the iterators
	70	+ parts.reload &= ~2;
	71	+ }
	72	+
	73	+ Scratch scratches(registerFile());
	74	+ const int t0 = (parts.reload & 1) ? scratches.obtain() : 0;
	75	+ const int t1 = (parts.reload & 2) ? scratches.obtain() : 0;
	76	+ for (int i=0 ; i<4 ; i++) {
	77	+ if (!mInfo[i].iterated)
	78	+ continue;
	79	+ // this component exists in the destination and is not replaced
	80	+ // by a texture unit.
	81	+ const int c = (parts.reload & 1) ? t0 : obtainReg();
	82	+ if (i==0) CONTEXT_LOAD(c, iterators.ydady);
	83	+ if (i==1) CONTEXT_LOAD(c, iterators.ydrdy);
	84	+ if (i==2) CONTEXT_LOAD(c, iterators.ydgdy);
	85	+ if (i==3) CONTEXT_LOAD(c, iterators.ydbdy);
	86	+ parts.argb[i].reg = c;
	87	+
	88	+ if (mInfo[i].smooth) {
	89	+ parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg();
	90	+ const int dvdx = parts.argb_dx[i].reg;
	91	+ temp_reg = scratches.obtain();
	92	+ CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx);
	93	+ MOV_REG_TO_REG(dvdx, temp_reg);
	94	+ IMUL(x.reg, temp_reg);
	95	+ ADD_REG_TO_REG(temp_reg, c);
	96	+ scratches.recycle(temp_reg);
	97	+
	98	+ // adjust the color iterator to make sure it won't overflow
	99	+ if (!mAA) {
	100	+ // this is not needed when we're using anti-aliasing
	101	+ // because we will (have to) clamp the components
	102	+ // anyway.
	103	+ int end = scratches.obtain();
	104	+ MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, end);
	105	+ SHR(16, end);
	106	+ IMUL(end, dvdx);
	107	+ temp_reg = end;
	108	+ // c - (dvdxend + c) = -(dvdxend)
	109	+ MOV_REG_TO_REG(dvdx, temp_reg);
	110	+ NEG(temp_reg);
	111	+ ADD_REG_TO_REG(c, dvdx);
	112	+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp_reg, c);
	113	+ /*
	114	+ SUB_REG_TO_REG(dvdx, temp_reg);
	115	+ switch(i) {
	116	+ case 0:
	117	+ JCC(Mnemonic_JNS, "1f_init_iterated_color");
	118	+ SUB_REG_TO_REG(dvdx, c);
	119	+ label("1f_init_iterated_color");
	120	+ break;
	121	+ case 1:
	122	+ JCC(Mnemonic_JNS, "2f_init_iterated_color");
	123	+ SUB_REG_TO_REG(dvdx, c);
	124	+ label("2f_init_iterated_color");
	125	+ break;
	126	+ case 2:
	127	+ JCC(Mnemonic_JNS, "3f_init_iterated_color");
	128	+ SUB_REG_TO_REG(dvdx, c);
	129	+ label("3f_init_iterated_color");
	130	+ break;
	131	+ case 3:
	132	+ JCC(Mnemonic_JNS, "4f_init_iterated_color");
	133	+ SUB_REG_TO_REG(dvdx, c);
	134	+ label("4f_init_iterated_color");
	135	+ break;
	136	+ }
	137	+ */
	138	+
	139	+ MOV_REG_TO_REG(c, temp_reg);
	140	+ SAR(31, temp_reg);
	141	+ NOT(temp_reg);
	142	+ AND_REG_TO_REG(temp_reg, c);
	143	+ scratches.recycle(end);
	144	+ }
	145	+ if(parts.reload & 2)
	146	+ scratches.recycle(dvdx);
	147	+ else
	148	+ recycleReg(dvdx);
	149	+ }
	150	+ CONTEXT_STORE(c, generated_vars.argb[i].c);
	151	+ if(parts.reload & 1)
	152	+ scratches.recycle(parts.argb[i].reg);
	153	+ else
	154	+ recycleReg(parts.argb[i].reg);
	155	+
	156	+ parts.argb[i].reg = -1;
	157	+ //if (parts.reload & 1) {
	158	+ // //MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
	159	+ //}
	160	+ }
	161	+ } else {
	162	+ // We're not smoothed, so we can
	163	+ // just use a packed version of the color and extract the
	164	+ // components as needed (or not at all if we don't blend)
	165	+
	166	+ // figure out if we need the iterated color
	167	+ int load = 0;
	168	+ for (int i=0 ; i<4 ; i++) {
	169	+ component_info_t& info = mInfo[i];
	170	+ if ((info.inDest \|\| info.needed) && !info.replaced)
	171	+ load \|= 1;
	172	+ }
	173	+
	174	+ parts.iterated_packed = 1;
	175	+ parts.packed = (!mTextureMachine.mask && !mBlending
	176	+ && !mFog && !mDithering);
	177	+ parts.reload = 0;
	178	+ if (load \|\| parts.packed) {
	179	+ if (mBlending \|\| mDithering \|\| mInfo[GGLFormat::ALPHA].needed) {
	180	+ comment("load initial iterated color (8888 packed)");
	181	+ parts.iterated.setTo(obtainReg(),
	182	+ &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888]));
	183	+ CONTEXT_LOAD(parts.iterated.reg, packed8888);
	184	+ } else {
	185	+ comment("load initial iterated color (dest format packed)");
	186	+
	187	+ parts.iterated.setTo(obtainReg(), &mCbFormat);
	188	+
	189	+ // pre-mask the iterated color
	190	+ const int bits = parts.iterated.size();
	191	+ const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
	192	+ uint32_t mask = 0;
	193	+ if (mMasking) {
	194	+ for (int i=0 ; i<4 ; i++) {
	195	+ const int component_mask = 1<<i;
	196	+ const int h = parts.iterated.format.c[i].h;
	197	+ const int l = parts.iterated.format.c[i].l;
	198	+ if (h && (!(mMasking & component_mask))) {
	199	+ mask \|= ((1<<(h-l))-1) << l;
	200	+ }
	201	+ }
	202	+ }
	203	+
	204	+ if (mMasking && ((mask & size)==0)) {
	205	+ // none of the components are present in the mask
	206	+ } else {
	207	+ CONTEXT_LOAD(parts.iterated.reg, packed);
	208	+ if (mCbFormat.size == 1) {
	209	+ int imm = 0xFF;
	210	+ AND_IMM_TO_REG(imm, parts.iterated.reg);
	211	+ } else if (mCbFormat.size == 2) {
	212	+ SHR(16, parts.iterated.reg);
	213	+ }
	214	+ }
	215	+
	216	+ // pre-mask the iterated color
	217	+ if (mMasking) {
	218	+ //AND_IMM_TO_REG(mask, parts.iterated.reg);
	219	+ build_and_immediate(parts.iterated.reg, parts.iterated.reg,
	220	+ mask, bits);
	221	+ }
	222	+ }
	223	+ mCurSp = mCurSp - 4;
	224	+ parts.iterated.offset_ebp = mCurSp;
	225	+ MOV_REG_TO_MEM(parts.iterated.reg, parts.iterated.offset_ebp, EBP);
	226	+ //PUSH(parts.iterated.reg);
	227	+ recycleReg(parts.iterated.reg);
	228	+ parts.iterated.reg=-1;
	229	+ }
	230	+ }
	231	+}
	232	+
	233	+void GGLX86Assembler::build_iterated_color(
	234	+ component_t& fragment,
	235	+ fragment_parts_t& parts,
	236	+ int component,
	237	+ Scratch& regs)
	238	+{
	239	+
	240	+ if (!mInfo[component].iterated)
	241	+ return;
	242	+
	243	+ if (parts.iterated_packed) {
	244	+ // iterated colors are packed, extract the one we need
	245	+ parts.iterated.reg = regs.obtain();
	246	+ MOV_MEM_TO_REG(parts.iterated.offset_ebp, EBP, parts.iterated.reg);
	247	+ extract(fragment, parts.iterated, component);
	248	+ regs.recycle(parts.iterated.reg);
	249	+ } else {
	250	+ fragment.h = GGL_COLOR_BITS;
	251	+ fragment.l = GGL_COLOR_BITS - 8;
	252	+ fragment.flags \|= CLEAR_LO;
	253	+ // iterated colors are held in their own register,
	254	+ // (smooth and/or dithering case)
	255	+ Scratch scratches(registerFile());
	256	+ mBuilderContext.Rctx = scratches.obtain();
	257	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	258	+ if (parts.reload==3) {
	259	+ // this implies mSmooth
	260	+ int dx = scratches.obtain();
	261	+ CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
	262	+ CONTEXT_LOAD(dx, generated_vars.argb[component].dx);
	263	+ ADD_REG_TO_REG(fragment.reg, dx);
	264	+ CONTEXT_STORE(dx, generated_vars.argb[component].c);
	265	+ scratches.recycle(dx);
	266	+ } else if (parts.reload & 1) {
	267	+ //MOV_MEM_TO_REG(parts.argb[component].offset_ebp, EBP, fragment.reg);
	268	+ CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
	269	+ } else {
	270	+ // we don't reload, so simply rename the register and mark as
	271	+ // non CORRUPTIBLE so that the texture env or blending code
	272	+ // won't modify this (renamed) register
	273	+ //regs.recycle(fragment.reg);
	274	+ //MOV_MEM_TO_REG(parts.argb[component].offset_ebp, EBP, fragment.reg);
	275	+ // it will also be used in build_smooth_shade
	276	+ CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
	277	+ //fragment.reg = parts.argb[component].reg;
	278	+ //fragment.flags &= ~CORRUPTIBLE;
	279	+ }
	280	+ scratches.recycle(mBuilderContext.Rctx);
	281	+ if (mInfo[component].smooth && mAA) {
	282	+ // when using smooth shading AND anti-aliasing, we need to clamp
	283	+ // the iterators because there is always an extra pixel on the
	284	+ // edges, which most of the time will cause an overflow
	285	+ // (since technically its outside of the domain).
	286	+ int temp = scratches.obtain();
	287	+ MOV_REG_TO_REG(fragment.reg, temp);
	288	+ SAR(31, temp);
	289	+ NOT(temp);
	290	+ OR_REG_TO_REG(temp, fragment.reg);
	291	+ component_sat(fragment, temp);
	292	+ scratches.recycle(temp);
	293	+ }
	294	+ }
	295	+}
	296	+
	297	+// ---------------------------------------------------------------------------
	298	+
	299	+void GGLX86Assembler::decodeLogicOpNeeds(const needs_t& needs)
	300	+{
	301	+ // gather some informations about the components we need to process...
	302	+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) \| GGL_CLEAR;
	303	+ switch(opcode) {
	304	+ case GGL_COPY:
	305	+ mLogicOp = 0;
	306	+ break;
	307	+ case GGL_CLEAR:
	308	+ case GGL_SET:
	309	+ mLogicOp = LOGIC_OP;
	310	+ break;
	311	+ case GGL_AND:
	312	+ case GGL_AND_REVERSE:
	313	+ case GGL_AND_INVERTED:
	314	+ case GGL_XOR:
	315	+ case GGL_OR:
	316	+ case GGL_NOR:
	317	+ case GGL_EQUIV:
	318	+ case GGL_OR_REVERSE:
	319	+ case GGL_OR_INVERTED:
	320	+ case GGL_NAND:
	321	+ mLogicOp = LOGIC_OP\|LOGIC_OP_SRC\|LOGIC_OP_DST;
	322	+ break;
	323	+ case GGL_NOOP:
	324	+ case GGL_INVERT:
	325	+ mLogicOp = LOGIC_OP\|LOGIC_OP_DST;
	326	+ break;
	327	+ case GGL_COPY_INVERTED:
	328	+ mLogicOp = LOGIC_OP\|LOGIC_OP_SRC;
	329	+ break;
	330	+ };
	331	+}
	332	+
	333	+void GGLX86Assembler::decodeTMUNeeds(const needs_t& needs, context_t const* c)
	334	+{
	335	+ uint8_t replaced=0;
	336	+ mTextureMachine.mask = 0;
	337	+ mTextureMachine.activeUnits = 0;
	338	+ for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) {
	339	+ texture_unit_t& tmu = mTextureMachine.tmu[i];
	340	+ if (replaced == 0xF) {
	341	+ // all components are replaced, skip this TMU.
	342	+ tmu.format_idx = 0;
	343	+ tmu.mask = 0;
	344	+ tmu.replaced = replaced;
	345	+ continue;
	346	+ }
	347	+ tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]);
	348	+ tmu.format = c->formats[tmu.format_idx];
	349	+ tmu.bits = tmu.format.size*8;
	350	+ tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]);
	351	+ tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]);
	352	+ tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i]));
	353	+ tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]);
	354	+ tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i])
	355	+ && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now
	356	+
	357	+ // 5551 linear filtering is not supported
	358	+ if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551)
	359	+ tmu.linear = 0;
	360	+
	361	+ tmu.mask = 0;
	362	+ tmu.replaced = replaced;
	363	+
	364	+ if (tmu.format_idx) {
	365	+ mTextureMachine.activeUnits++;
	366	+ if (tmu.format.c[0].h) tmu.mask \|= 0x1;
	367	+ if (tmu.format.c[1].h) tmu.mask \|= 0x2;
	368	+ if (tmu.format.c[2].h) tmu.mask \|= 0x4;
	369	+ if (tmu.format.c[3].h) tmu.mask \|= 0x8;
	370	+ if (tmu.env == GGL_REPLACE) {
	371	+ replaced \|= tmu.mask;
	372	+ } else if (tmu.env == GGL_DECAL) {
	373	+ if (!tmu.format.c[GGLFormat::ALPHA].h) {
	374	+ // if we don't have alpha, decal does nothing
	375	+ tmu.mask = 0;
	376	+ } else {
	377	+ // decal always ignores At
	378	+ tmu.mask &= ~(1<<GGLFormat::ALPHA);
	379	+ }
	380	+ }
	381	+ }
	382	+ mTextureMachine.mask \|= tmu.mask;
	383	+ ////printf("%d: mask=%08lx, replaced=%08lx\n",
	384	+ // i, int(tmu.mask), int(tmu.replaced));
	385	+ }
	386	+ mTextureMachine.replaced = replaced;
	387	+ mTextureMachine.directTexture = 0;
	388	+ ////printf("replaced=%08lx\n", mTextureMachine.replaced);
	389	+}
	390	+
	391	+
	392	+void GGLX86Assembler::init_textures(
	393	+ tex_coord_t* coords,
	394	+ const reg_t& x, const reg_t& y)
	395	+{
	396	+ context_t const* c = mBuilderContext.c;
	397	+ const needs_t& needs = mBuilderContext.needs;
	398	+ reg_t temp_reg_t;
	399	+ int Rx = x.reg;
	400	+ int Ry = y.reg;
	401	+
	402	+ if (mTextureMachine.mask) {
	403	+ comment("compute texture coordinates");
	404	+ }
	405	+
	406	+ // init texture coordinates for each tmu
	407	+ const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n);
	408	+ const bool multiTexture = mTextureMachine.activeUnits > 1;
	409	+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
	410	+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
	411	+ if (tmu.format_idx == 0)
	412	+ continue;
	413	+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
	414	+ (tmu.twrap == GGL_NEEDS_WRAP_11))
	415	+ {
	416	+ Scratch scratches(registerFile());
	417	+ // 1:1 texture
	418	+ pointer_t& txPtr = coords[i].ptr;
	419	+ txPtr.setTo(obtainReg(), tmu.bits);
	420	+ CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy);
	421	+ SAR(16, txPtr.reg);
	422	+ ADD_REG_TO_REG(txPtr.reg, Rx);
	423	+ CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy);
	424	+ SAR(16, txPtr.reg);
	425	+ ADD_REG_TO_REG(txPtr.reg, Ry);
	426	+ // Rx and Ry are changed
	427	+ // Rx = Rx + ti.iterators.ydsdy>>16
	428	+ // Ry = Ry + ti.iterators.ydtdy>>16
	429	+ // Rx = Ry * ti.stide + Rx
	430	+
	431	+ // merge base & offset
	432	+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
	433	+ IMUL(Ry, txPtr.reg);
	434	+ ADD_REG_TO_REG(txPtr.reg, Rx);
	435	+
	436	+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
	437	+ temp_reg_t.setTo(Rx);
	438	+ base_offset(txPtr, txPtr, temp_reg_t);
	439	+ //PUSH(txPtr.reg);
	440	+ mCurSp = mCurSp - 4;
	441	+ txPtr.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg, parts.z.reg
	442	+ MOV_REG_TO_MEM(txPtr.reg, txPtr.offset_ebp, EBP);
	443	+ recycleReg(txPtr.reg);
	444	+ txPtr.reg=-1;
	445	+ } else {
	446	+ Scratch scratches(registerFile());
	447	+ reg_t& s = coords[i].s;
	448	+ reg_t& t = coords[i].t;
	449	+ // s = (x * dsdx)>>16 + ydsdy
	450	+ // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0
	451	+ // t = (x * dtdx)>>16 + ydtdy
	452	+ // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0
	453	+ const int need_w = GGL_READ_NEEDS(W, needs.n);
	454	+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
	455	+ if (need_w) {
	456	+ s.setTo(obtainReg());
	457	+ t.setTo(obtainReg());
	458	+ CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy);
	459	+ CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy);
	460	+ CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
	461	+ CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
	462	+ recycleReg(s.reg);
	463	+ recycleReg(t.reg);
	464	+ } else {
	465	+ int ydsdy = scratches.obtain();
	466	+ int dsdx = scratches.obtain();
	467	+ CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy);
	468	+ CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
	469	+ IMUL(Rx, dsdx);
	470	+ ADD_REG_TO_REG(dsdx, ydsdy);
	471	+ CONTEXT_STORE(ydsdy, generated_vars.texture[i].spill[0]);
	472	+ scratches.recycle(ydsdy);
	473	+ scratches.recycle(dsdx);
	474	+
	475	+ int ydtdy = scratches.obtain();
	476	+ int dtdx = scratches.obtain();
	477	+ CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy);
	478	+ CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
	479	+ IMUL(Rx, dtdx);
	480	+ ADD_REG_TO_REG(dtdx, ydtdy);
	481	+ CONTEXT_STORE(ydtdy, generated_vars.texture[i].spill[1]);
	482	+ scratches.recycle(ydtdy);
	483	+ scratches.recycle(dtdx);
	484	+
	485	+ // s.reg = Rx * ti.dsdx + ydsdy
	486	+ // t.reg = Rx * ti.dtdx + ydtdy
	487	+ }
	488	+ }
	489	+
	490	+ // direct texture?
	491	+ if (!multiTexture && !mBlending && !mDithering && !mFog &&
	492	+ cb_format_idx == tmu.format_idx && !tmu.linear &&
	493	+ mTextureMachine.replaced == tmu.mask)
	494	+ {
	495	+ mTextureMachine.directTexture = i + 1;
	496	+ }
	497	+ }
	498	+}
	499	+
	500	+void GGLX86Assembler::build_textures( fragment_parts_t& parts,
	501	+ Scratch& regs)
	502	+{
	503	+ context_t const* c = mBuilderContext.c;
	504	+ const needs_t& needs = mBuilderContext.needs;
	505	+ reg_t temp_reg_t;
	506	+ //int Rctx = mBuilderContext.Rctx;
	507	+
	508	+
	509	+ const bool multiTexture = mTextureMachine.activeUnits > 1;
	510	+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
	511	+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
	512	+ if (tmu.format_idx == 0)
	513	+ continue;
	514	+
	515	+ pointer_t& txPtr = parts.coords[i].ptr;
	516	+ pixel_t& texel = parts.texel[i];
	517	+
	518	+ // repeat...
	519	+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
	520	+ (tmu.twrap == GGL_NEEDS_WRAP_11))
	521	+ { // 1:1 textures
	522	+ comment("fetch texel");
	523	+ texel.setTo(regs.obtain(), &tmu.format);
	524	+ txPtr.reg = regs.obtain();
	525	+ MOV_MEM_TO_REG(txPtr.offset_ebp, EBP, txPtr.reg);
	526	+ mCurSp = mCurSp - 4;
	527	+ texel.offset_ebp = mCurSp;
	528	+ load(txPtr, texel, WRITE_BACK);
	529	+ MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP);
	530	+ regs.recycle(texel.reg);
	531	+ regs.recycle(txPtr.reg);
	532	+ } else {
	533	+ Scratch scratches(registerFile());
	534	+ reg_t& s = parts.coords[i].s;
	535	+ reg_t& t = parts.coords[i].t;
	536	+ comment("reload s/t (multitexture or linear filtering)");
	537	+ s.reg = scratches.obtain();
	538	+ t.reg = scratches.obtain();
	539	+ mBuilderContext.Rctx = scratches.obtain();
	540	+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
	541	+ CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);
	542	+ CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
	543	+
	544	+ comment("compute repeat/clamp");
	545	+ int width = scratches.obtain();
	546	+ int height = scratches.obtain();
	547	+ int U = 0;
	548	+ int V = 0;
	549	+ // U and V will be stored onto the stack due to the limited register
	550	+ reg_t reg_U, reg_V;
	551	+
	552	+ CONTEXT_LOAD(width, generated_vars.texture[i].width);
	553	+ CONTEXT_LOAD(height, generated_vars.texture[i].height);
	554	+ scratches.recycle(mBuilderContext.Rctx);
	555	+
	556	+ int FRAC_BITS = 0;
	557	+ if (tmu.linear) {
	558	+ // linear interpolation
	559	+ if (tmu.format.size == 1) {
	560	+ // for 8-bits textures, we can afford
	561	+ // 7 bits of fractional precision at no
	562	+ // additional cost (we can't do 8 bits
	563	+ // because filter8 uses signed 16 bits muls)
	564	+ FRAC_BITS = 7;
	565	+ } else if (tmu.format.size == 2) {
	566	+ // filter16() is internally limited to 4 bits, so:
	567	+ // FRAC_BITS=2 generates less instructions,
	568	+ // FRAC_BITS=3,4,5 creates unpleasant artifacts,
	569	+ // FRAC_BITS=6+ looks good
	570	+ FRAC_BITS = 6;
	571	+ } else if (tmu.format.size == 4) {
	572	+ // filter32() is internally limited to 8 bits, so:
	573	+ // FRAC_BITS=4 looks good
	574	+ // FRAC_BITS=5+ looks better, but generates 3 extra ipp
	575	+ FRAC_BITS = 6;
	576	+ } else {
	577	+ // for all other cases we use 4 bits.
	578	+ FRAC_BITS = 4;
	579	+ }
	580	+ }
	581	+ int u = scratches.obtain();
	582	+ // s.reg and t.reg are recycled in wrapping
	583	+ wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS, scratches);
	584	+ int v = scratches.obtain();
	585	+ wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS, scratches);
	586	+
	587	+
	588	+ if (tmu.linear) {
	589	+
	590	+ //mBuilderContext.Rctx = scratches.obtain();
	591	+ //MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
	592	+ //CONTEXT_LOAD(width, generated_vars.texture[i].width);
	593	+ //CONTEXT_LOAD(height, generated_vars.texture[i].height);
	594	+ //scratches.recycle(mBuilderContext.Rctx);
	595	+
	596	+ comment("compute linear filtering offsets");
	597	+ // pixel size scale
	598	+ const int shift = 31 - gglClz(tmu.format.size);
	599	+ U = scratches.obtain();
	600	+ V = scratches.obtain();
	601	+
	602	+
	603	+ // sample the texel center
	604	+ SUB_IMM_TO_REG(1<<(FRAC_BITS-1), u);
	605	+ SUB_IMM_TO_REG(1<<(FRAC_BITS-1), v);
	606	+
	607	+ // get the fractionnal part of U,V
	608	+ MOV_REG_TO_REG(u, U);
	609	+ AND_IMM_TO_REG((1<<FRAC_BITS)-1, U);
	610	+ MOV_REG_TO_REG(v, V);
	611	+ AND_IMM_TO_REG((1<<FRAC_BITS)-1, V);
	612	+
	613	+ // below we will pop U and V in the filter function
	614	+ mCurSp = mCurSp - 4;
	615	+ MOV_REG_TO_MEM(U, mCurSp, EBP);
	616	+ reg_U.offset_ebp = mCurSp;
	617	+ mCurSp = mCurSp - 4;
	618	+ MOV_REG_TO_MEM(V, mCurSp, EBP);
	619	+ reg_V.offset_ebp = mCurSp;
	620	+
	621	+ scratches.recycle(U);
	622	+ scratches.recycle(V);
	623	+
	624	+ // compute width-1 and height-1
	625	+ SUB_IMM_TO_REG(1, width);
	626	+ SUB_IMM_TO_REG(1, height);
	627	+
	628	+ // the registers are used up
	629	+ int temp1 = scratches.obtain();
	630	+ int temp2 = scratches.obtain();
	631	+ // get the integer part of U,V and clamp/wrap
	632	+ // and compute offset to the next texel
	633	+ if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) {
	634	+ // u has already been REPEATed
	635	+ SAR(FRAC_BITS, u);
	636	+ CMOV_REG_TO_REG(Mnemonic_CMOVS, width, u);
	637	+ MOV_IMM_TO_REG(1<<shift, temp1);
	638	+ MOV_REG_TO_REG(width, temp2);
	639	+ // SHL may pollute the CF flag
	640	+ SHL(shift, temp2);
	641	+ mCurSp = mCurSp - 4;
	642	+ int width_offset_ebp = mCurSp;
	643	+ // width will be changed after the first comparison
	644	+ MOV_REG_TO_MEM(width, width_offset_ebp, EBP);
	645	+ CMP_REG_TO_REG(width, u);
	646	+ CMOV_REG_TO_REG(Mnemonic_CMOVL, temp1, width);
	647	+ if (shift) {
	648	+ CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp2, width);
	649	+ }
	650	+ MOV_REG_TO_REG(width, temp1);
	651	+ NEG(temp1);
	652	+ // width is actually changed
	653	+ CMP_MEM_TO_REG(EBP, width_offset_ebp, u);
	654	+ CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp1, width);
	655	+ } else {
	656	+ // u has not been CLAMPed yet
	657	+ // algorithm:
	658	+ // if ((u>>4) >= width)
	659	+ // u = width<<4
	660	+ // width = 0
	661	+ // else
	662	+ // width = 1<<shift
	663	+ // u = u>>4; // get integer part
	664	+ // if (u<0)
	665	+ // u = 0
	666	+ // width = 0
	667	+ // generated_vars.rt = width
	668	+
	669	+ MOV_REG_TO_REG(width, temp2);
	670	+ SHL(FRAC_BITS, temp2);
	671	+ MOV_REG_TO_REG(u, temp1);
	672	+ SAR(FRAC_BITS, temp1);
	673	+ CMP_REG_TO_REG(temp1, width);
	674	+ CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, u);
	675	+ // mov doesn't affect the flags
	676	+ MOV_IMM_TO_REG(0, temp2);
	677	+ CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, width);
	678	+ MOV_IMM_TO_REG(1 << shift, temp2);
	679	+ CMOV_REG_TO_REG(Mnemonic_CMOVG, temp2, width);
	680	+
	681	+ MOV_IMM_TO_REG(0, temp2);
	682	+ SAR(FRAC_BITS, u);
	683	+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, u);
	684	+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, width);
	685	+ }
	686	+ scratches.recycle(temp1);
	687	+ scratches.recycle(temp2);
	688	+ mBuilderContext.Rctx = scratches.obtain();
	689	+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
	690	+ CONTEXT_STORE(width, generated_vars.rt);
	691	+
	692	+ const int stride = width;
	693	+ CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
	694	+ scratches.recycle(mBuilderContext.Rctx);
	695	+
	696	+ temp1 = scratches.obtain();
	697	+ temp2 = scratches.obtain();
	698	+
	699	+ int height_offset_ebp;
	700	+ if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) {
	701	+ // v has already been REPEATed
	702	+ SAR(FRAC_BITS, v);
	703	+ CMOV_REG_TO_REG(Mnemonic_CMOVS, height, v);
	704	+ MOV_IMM_TO_REG(1<<shift, temp1);
	705	+ MOV_REG_TO_REG(height, temp2);
	706	+ SHL(shift, temp2);
	707	+ mCurSp = mCurSp - 4;
	708	+ height_offset_ebp = mCurSp;
	709	+ // height will be changed after the first comparison
	710	+ MOV_REG_TO_MEM(height, height_offset_ebp, EBP);
	711	+ CMP_REG_TO_REG(height, v);
	712	+ CMOV_REG_TO_REG(Mnemonic_CMOVL, temp1, height);
	713	+ if (shift) {
	714	+ CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp2, height);
	715	+ }
	716	+ MOV_REG_TO_REG(height, temp1);
	717	+ NEG(temp1);
	718	+ // height is actually changed
	719	+ CMP_MEM_TO_REG(EBP, height_offset_ebp, v);
	720	+ CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp1, height);
	721	+ IMUL(stride, height);
	722	+ } else {
	723	+ // u has not been CLAMPed yet
	724	+ MOV_REG_TO_REG(height, temp2);
	725	+ SHL(FRAC_BITS, temp2);
	726	+ MOV_REG_TO_REG(v, temp1);
	727	+ SAR(FRAC_BITS, temp1);
	728	+
	729	+ mCurSp = mCurSp - 4;
	730	+ height_offset_ebp = mCurSp;
	731	+ // height may be changed after the first comparison
	732	+ MOV_REG_TO_MEM(height, height_offset_ebp, EBP);
	733	+
	734	+ CMP_REG_TO_REG(temp1, height);
	735	+ CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, v);
	736	+ MOV_IMM_TO_REG(0, temp2);
	737	+ CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, height);
	738	+
	739	+ if (shift) {
	740	+ // stride = width. It's not used
	741	+ // shift may pollute the flags
	742	+ SHL(shift, stride);
	743	+ // height may be changed to 0
	744	+ CMP_REG_TO_MEM(temp1, height_offset_ebp, EBP);
	745	+ CMOV_REG_TO_REG(Mnemonic_CMOVG, stride, height);
	746	+ } else {
	747	+ CMOV_REG_TO_REG(Mnemonic_CMOVG, stride, height);
	748	+ }
	749	+ MOV_IMM_TO_REG(0, temp2);
	750	+ SAR(FRAC_BITS, v);
	751	+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, v);
	752	+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, height);
	753	+ }
	754	+ scratches.recycle(temp1);
	755	+ scratches.recycle(temp2);
	756	+ mBuilderContext.Rctx = scratches.obtain();
	757	+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
	758	+ CONTEXT_STORE(height, generated_vars.lb);
	759	+ scratches.recycle(mBuilderContext.Rctx);
	760	+ }
	761	+
	762	+ scratches.recycle(width);
	763	+ scratches.recycle(height);
	764	+
	765	+ // iterate texture coordinates...
	766	+ comment("iterate s,t");
	767	+ int dsdx = scratches.obtain();
	768	+ s.reg = scratches.obtain();
	769	+ mBuilderContext.Rctx = scratches.obtain();
	770	+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
	771	+ CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
	772	+ CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);
	773	+ ADD_REG_TO_REG(dsdx, s.reg);
	774	+ CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
	775	+ scratches.recycle(s.reg);
	776	+ scratches.recycle(dsdx);
	777	+ int dtdx = scratches.obtain();
	778	+ t.reg = scratches.obtain();
	779	+ CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
	780	+ CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
	781	+ ADD_REG_TO_REG(dtdx, t.reg);
	782	+ CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
	783	+ scratches.recycle(dtdx);
	784	+ scratches.recycle(t.reg);
	785	+
	786	+ // merge base & offset...
	787	+ comment("merge base & offset");
	788	+ texel.setTo(scratches.obtain(), &tmu.format);
	789	+ //txPtr.setTo(texel.reg, tmu.bits);
	790	+ txPtr.setTo(scratches.obtain(), tmu.bits);
	791	+ int stride = scratches.obtain();
	792	+ CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
	793	+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
	794	+ scratches.recycle(mBuilderContext.Rctx);
	795	+ MOVSX_REG_TO_REG(OpndSize_16, v, v);
	796	+ MOVSX_REG_TO_REG(OpndSize_16, stride, stride);
	797	+ IMUL(v, stride);
	798	+ ADD_REG_TO_REG(stride, u);// u+v*stride
	799	+ temp_reg_t.setTo(u);
	800	+ base_offset(txPtr, txPtr, temp_reg_t);
	801	+
	802	+ // recycle registers we don't need anymore
	803	+ scratches.recycle(u);
	804	+ scratches.recycle(v);
	805	+ scratches.recycle(stride);
	806	+
	807	+ mCurSp = mCurSp - 4;
	808	+ texel.offset_ebp = mCurSp;
	809	+ // load texel
	810	+ if (!tmu.linear) {
	811	+ comment("fetch texel in building texture");
	812	+ load(txPtr, texel, 0);
	813	+ MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP);
	814	+ scratches.recycle(texel.reg);
	815	+ scratches.recycle(txPtr.reg);
	816	+ } else {
	817	+ comment("fetch texel, bilinear");
	818	+ // the registes are not enough. We spill texel and previous U and V
	819	+ // texel.reg is recycled in the following functions since there are more than one code path
	820	+ switch (tmu.format.size) {
	821	+ case 1:
	822	+ filter8(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches);
	823	+ break;
	824	+ case 2:
	825	+ filter16(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches);
	826	+ break;
	827	+ case 3:
	828	+ filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS);
	829	+ break;
	830	+ case 4:
	831	+ filter32(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches);
	832	+ break;
	833	+ }
	834	+ }
	835	+ }
	836	+ }
	837	+}
	838	+
	839	+void GGLX86Assembler::build_iterate_texture_coordinates(
	840	+ const fragment_parts_t& parts)
	841	+{
	842	+ const bool multiTexture = mTextureMachine.activeUnits > 1;
	843	+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
	844	+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
	845	+ if (tmu.format_idx == 0)
	846	+ continue;
	847	+
	848	+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
	849	+ (tmu.twrap == GGL_NEEDS_WRAP_11))
	850	+ { // 1:1 textures
	851	+ const pointer_t& txPtr = parts.coords[i].ptr;
	852	+ ADD_IMM_TO_MEM(txPtr.size>>3, txPtr.offset_ebp, EBP);
	853	+ } else {
	854	+ Scratch scratches(registerFile());
	855	+ int s = parts.coords[i].s.reg;
	856	+ int t = parts.coords[i].t.reg;
	857	+ mBuilderContext.Rctx = scratches.obtain();
	858	+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
	859	+ s = scratches.obtain();
	860	+ int dsdx = scratches.obtain();
	861	+ CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]);
	862	+ CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
	863	+ ADD_REG_TO_REG(dsdx, s);
	864	+ CONTEXT_STORE(s, generated_vars.texture[i].spill[0]);
	865	+ scratches.recycle(s);
	866	+ scratches.recycle(dsdx);
	867	+ int dtdx = scratches.obtain();
	868	+ t = scratches.obtain();
	869	+ CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]);
	870	+ CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
	871	+ ADD_REG_TO_REG(dtdx, t);
	872	+ CONTEXT_STORE(t, generated_vars.texture[i].spill[1]);
	873	+ scratches.recycle(t);
	874	+ scratches.recycle(dtdx);
	875	+ }
	876	+ }
	877	+}
	878	+
	879	+void GGLX86Assembler::filter8(
	880	+ const fragment_parts_t& parts,
	881	+ pixel_t& texel, const texture_unit_t& tmu,
	882	+ reg_t reg_U, reg_t reg_V, pointer_t& txPtr,
	883	+ int FRAC_BITS, Scratch& scratches)
	884	+{
	885	+ if (tmu.format.components != GGL_ALPHA &&
	886	+ tmu.format.components != GGL_LUMINANCE)
	887	+ {
	888	+ // this is a packed format, and we don't support
	889	+ // linear filtering (it's probably RGB 332)
	890	+ // Should not happen with OpenGL\|ES
	891	+ MOVZX_MEM_TO_REG(OpndSize_8, txPtr.reg, 0, texel.reg);
	892	+ MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP);
	893	+ scratches.recycle(texel.reg);
	894	+ scratches.recycle(txPtr.reg);
	895	+ return;
	896	+ }
	897	+
	898	+ // ------------------------
	899	+
	900	+ //int d = scratches.obtain();
	901	+ //int u = scratches.obtain();
	902	+ //int k = scratches.obtain();
	903	+
	904	+ scratches.recycle(texel.reg);
	905	+ int rt = scratches.obtain();
	906	+ int lb = scratches.obtain();
	907	+
	908	+ // RB -> U * V
	909	+
	910	+ mBuilderContext.Rctx = scratches.obtain();
	911	+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
	912	+ CONTEXT_LOAD(rt, generated_vars.rt);
	913	+ CONTEXT_LOAD(lb, generated_vars.lb);
	914	+ scratches.recycle(mBuilderContext.Rctx);
	915	+ int pixel= scratches.obtain();
	916	+
	917	+ int offset = pixel;
	918	+
	919	+ MOV_REG_TO_REG(rt, offset);
	920	+ ADD_REG_TO_REG(lb, offset);
	921	+
	922	+ int temp_reg1 = scratches.obtain();
	923	+ int temp_reg2 = scratches.obtain();
	924	+ // it seems that the address mode with base and scale reg cannot be encoded correctly
	925	+ //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, temp_reg1, OpndSize_8);
	926	+ ADD_REG_TO_REG(txPtr.reg, offset);
	927	+ MOVZX_MEM_TO_REG(OpndSize_8, offset, 0, temp_reg1);
	928	+ // pixel is only 8-bits
	929	+ MOV_REG_TO_REG(temp_reg1, pixel);
	930	+ MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1);
	931	+ MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg2);
	932	+ IMUL(temp_reg2, temp_reg1);
	933	+ MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel);
	934	+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg1, temp_reg2);
	935	+ IMUL(temp_reg2, pixel);
	936	+ NEG(temp_reg1);
	937	+ ADD_IMM_TO_REG(1<<(FRAC_BITS*2), temp_reg1);
	938	+ mCurSp = mCurSp - 4;
	939	+ int d_offset_ebp = mCurSp;
	940	+ MOV_REG_TO_MEM(pixel, d_offset_ebp, EBP);
	941	+ mCurSp = mCurSp - 4;
	942	+ int k_offset_ebp = mCurSp;
	943	+ MOV_REG_TO_MEM(temp_reg1, k_offset_ebp, EBP);
	944	+
	945	+
	946	+ // LB -> (1-U) * V
	947	+ MOV_MEM_TO_REG(reg_U.offset_ebp, EBP, temp_reg2);
	948	+ NEG(temp_reg2);
	949	+ ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2);
	950	+ MOV_REG_TO_MEM(temp_reg2, reg_U.offset_ebp, EBP);
	951	+
	952	+ //MOV_MEM_SCALE_TO_REG(txPtr.reg, lb, 1, pixel, OpndSize_8);
	953	+ ADD_REG_TO_REG(txPtr.reg, lb);
	954	+ MOVZX_MEM_TO_REG(OpndSize_8, lb, 0, pixel);
	955	+
	956	+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg2);
	957	+ MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg1);
	958	+ IMUL(temp_reg1, temp_reg2);
	959	+ MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel);
	960	+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg1);
	961	+ IMUL(pixel, temp_reg1);
	962	+ ADD_REG_TO_MEM(temp_reg1, EBP, d_offset_ebp);
	963	+ SUB_REG_TO_MEM(temp_reg2, EBP, k_offset_ebp);
	964	+
	965	+
	966	+ // LT -> (1-U)*(1-V)
	967	+ MOV_MEM_TO_REG(reg_V.offset_ebp, EBP, temp_reg2);
	968	+ NEG(temp_reg2);
	969	+ ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2);
	970	+ MOV_REG_TO_MEM(temp_reg2, reg_V.offset_ebp, EBP);
	971	+
	972	+ MOVZX_MEM_TO_REG(OpndSize_8, txPtr.reg, 0, pixel);
	973	+
	974	+ MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1);
	975	+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg2);
	976	+ IMUL(temp_reg1, temp_reg2);
	977	+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg1);
	978	+ MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel);
	979	+ IMUL(pixel, temp_reg1);
	980	+ ADD_REG_TO_MEM(temp_reg1, EBP, d_offset_ebp);
	981	+
	982	+ // RT -> U*(1-V)
	983	+ //MOV_MEM_SCALE_TO_REG(txPtr.reg, rt, 1, pixel, OpndSize_8);
	984	+ ADD_REG_TO_REG(txPtr.reg, rt);
	985	+ MOVZX_MEM_TO_REG(OpndSize_8, rt, 0, pixel);
	986	+
	987	+ int k = rt;
	988	+ MOV_MEM_TO_REG(k_offset_ebp, EBP, k);
	989	+ SUB_REG_TO_REG(temp_reg2, k);
	990	+ MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel);
	991	+ MOVSX_REG_TO_REG(OpndSize_16, k, k);
	992	+ IMUL(pixel, k);
	993	+ ADD_MEM_TO_REG(EBP, d_offset_ebp, k);
	994	+ MOV_REG_TO_MEM(k, texel.offset_ebp, EBP);
	995	+ scratches.recycle(rt);
	996	+ scratches.recycle(lb);
	997	+ scratches.recycle(pixel);
	998	+ scratches.recycle(txPtr.reg);
	999	+ scratches.recycle(temp_reg1);
	1000	+ scratches.recycle(temp_reg2);
	1001	+ for (int i=0 ; i<4 ; i++) {
	1002	+ if (!texel.format.c[i].h) continue;
	1003	+ texel.format.c[i].h = FRAC_BITS*2+8;
	1004	+

Part of diff was cut off due to size limit. Use your local client to view the full diff.

system-corennnn Fork

提交

標籤

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

Change Summary

差異

system-corennnn
Fork