• R/O
  • HTTP
  • SSH
  • HTTPS

提交

標籤
無標籤

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

system/corennnnn


Commit MetaInfo

修訂42361542c8aa1edc8c1892e1d194ac8d0894ca5a (tree)
時間2016-12-09 16:05:38
作者Chih-Wei Huang <cwhuang@linu...>
CommiterChih-Wei Huang

Log Message

Software-accelerated Pixel Flinger

The typical graphic workloads on VirtualBox are improved 3X.

Supports both x86 and x86_64.

Similar change by HazouPH <jgrharbers@gmail.com>:
http://review.cyanogenmod.org/#/c/70896/

And by Quanganh pham <quanganh2627@gmail.com>:
http://review.cyanogenmod.org/#/c/97125/

Change Summary

差異

--- a/libpixelflinger/Android.mk
+++ b/libpixelflinger/Android.mk
@@ -7,9 +7,16 @@ include $(CLEAR_VARS)
77
88 include $(CLEAR_VARS)
99 PIXELFLINGER_SRC_FILES:= \
10+ codeflinger/CodeCache.cpp \
11+ format.cpp \
12+ clear.cpp \
13+ raster.cpp \
14+ buffer.cpp
15+
16+ifeq ($(filter x86%,$(TARGET_ARCH)),)
17+PIXELFLINGER_SRC_FILES += \
1018 codeflinger/ARMAssemblerInterface.cpp \
1119 codeflinger/ARMAssemblerProxy.cpp \
12- codeflinger/CodeCache.cpp \
1320 codeflinger/GGLAssembler.cpp \
1421 codeflinger/load_store.cpp \
1522 codeflinger/blending.cpp \
@@ -19,10 +26,8 @@ PIXELFLINGER_SRC_FILES:= \
1926 pixelflinger.cpp.arm \
2027 trap.cpp.arm \
2128 scanline.cpp.arm \
22- format.cpp \
23- clear.cpp \
24- raster.cpp \
25- buffer.cpp
29+
30+endif
2631
2732 PIXELFLINGER_CFLAGS := -fstrict-aliasing -fomit-frame-pointer
2833
@@ -43,6 +48,18 @@ PIXELFLINGER_SRC_FILES_arm64 := \
4348 arch-arm64/col32cb16blend.S \
4449 arch-arm64/t32cb16blend.S \
4550
51+PIXELFLINGER_SRC_FILES_x86 := \
52+ codeflinger/x86/X86Assembler.cpp \
53+ codeflinger/x86/GGLX86Assembler.cpp \
54+ codeflinger/x86/load_store.cpp \
55+ codeflinger/x86/blending.cpp \
56+ codeflinger/x86/texturing.cpp \
57+ fixed.cpp \
58+ picker.cpp \
59+ pixelflinger.cpp \
60+ trap.cpp \
61+ scanline.cpp
62+
4663 ifndef ARCH_MIPS_REV6
4764 PIXELFLINGER_SRC_FILES_mips := \
4865 codeflinger/MIPSAssembler.cpp \
@@ -66,6 +83,8 @@ LOCAL_MODULE:= libpixelflinger
6683 LOCAL_SRC_FILES := $(PIXELFLINGER_SRC_FILES)
6784 LOCAL_SRC_FILES_arm := $(PIXELFLINGER_SRC_FILES_arm)
6885 LOCAL_SRC_FILES_arm64 := $(PIXELFLINGER_SRC_FILES_arm64)
86+LOCAL_SRC_FILES_x86 := $(PIXELFLINGER_SRC_FILES_x86)
87+LOCAL_SRC_FILES_x86_64 := $(PIXELFLINGER_SRC_FILES_x86)
6988 LOCAL_SRC_FILES_mips := $(PIXELFLINGER_SRC_FILES_mips)
7089 LOCAL_SRC_FILES_mips64 := $(PIXELFLINGER_SRC_FILES_mips64)
7190 LOCAL_CFLAGS := $(PIXELFLINGER_CFLAGS)
@@ -73,6 +92,8 @@ LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include
7392 LOCAL_C_INCLUDES += $(LOCAL_EXPORT_C_INCLUDE_DIRS) \
7493 external/safe-iop/include
7594 LOCAL_SHARED_LIBRARIES := libcutils liblog libutils
95+LOCAL_WHOLE_STATIC_LIBRARIES_x86 := libenc
96+LOCAL_WHOLE_STATIC_LIBRARIES_x86_64 := libenc
7697
7798 # Really this should go away entirely or at least not depend on
7899 # libhardware, but this at least gets us built.
--- /dev/null
+++ b/libpixelflinger/codeflinger/Android.mk
@@ -0,0 +1,3 @@
1+ifneq ($(filter x86%,$(TARGET_ARCH)),)
2+include $(call all-named-subdir-makefiles,x86/libenc)
3+endif
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/GGLX86Assembler.cpp
@@ -0,0 +1,1507 @@
1+/* libs/pixelflinger/codeflinger/x86/GGLX86Assembler.cpp
2+**
3+** Copyright 2006, The Android Open Source Project
4+**
5+** Licensed under the Apache License, Version 2.0 (the "License");
6+** you may not use this file except in compliance with the License.
7+** You may obtain a copy of the License at
8+**
9+** http://www.apache.org/licenses/LICENSE-2.0
10+**
11+** Unless required by applicable law or agreed to in writing, software
12+** distributed under the License is distributed on an "AS IS" BASIS,
13+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+** See the License for the specific language governing permissions and
15+** limitations under the License.
16+*/
17+
18+#define LOG_TAG "GGLX86Assembler"
19+
20+#include <assert.h>
21+#include <stdint.h>
22+#include <stdlib.h>
23+#include <stdio.h>
24+#include <sys/types.h>
25+#include <cutils/log.h>
26+
27+#include "codeflinger/x86/GGLX86Assembler.h"
28+
29+namespace android {
30+
31+// ----------------------------------------------------------------------------
32+
33+GGLX86Assembler::GGLX86Assembler(const sp<Assembly>& assembly)
34+ : X86Assembler(assembly), X86RegisterAllocator(), mOptLevel(7)
35+{
36+}
37+
38+GGLX86Assembler::~GGLX86Assembler()
39+{
40+}
41+
42+void GGLX86Assembler::reset(int opt_level)
43+{
44+ X86Assembler::reset();
45+ X86RegisterAllocator::reset();
46+ mOptLevel = opt_level;
47+}
48+
49+// ---------------------------------------------------------------------------
50+
51+int GGLX86Assembler::scanline(const needs_t& needs, context_t const* c)
52+{
53+ int err = 0;
54+ err = scanline_core(needs, c);
55+ if (err != 0)
56+ ALOGE("scanline_core failed probably due to running out of the registers: %d\n", err);
57+
58+ // XXX: in theory, pcForLabel is not valid before generate()
59+ char* fragment_start_pc = pcForLabel("fragment_loop");
60+ char* fragment_end_pc = pcForLabel("fragment_end");
61+ const int per_fragment_ins_size = int(fragment_end_pc - fragment_start_pc);
62+
63+ // build a name for our pipeline
64+ char name[128];
65+ sprintf(name,
66+ "scanline__%08X:%08X_%08X_%08X [%3d ipp ins size]",
67+ needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ins_size);
68+
69+ if (err) {
70+ ALOGE("Error while generating ""%s""\n", name);
71+ disassemble(name);
72+ return -1;
73+ }
74+
75+ return generate(name);
76+}
77+
78+int GGLX86Assembler::scanline_core(const needs_t& needs, context_t const* c)
79+{
80+ int64_t duration = ggl_system_time();
81+
82+ mBlendFactorCached = 0;
83+ mBlending = 0;
84+ mMasking = 0;
85+ mAA = GGL_READ_NEEDS(P_AA, needs.p);
86+ mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
87+ mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
88+ mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
89+ mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
90+ mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
91+ mBuilderContext.needs = needs;
92+ mBuilderContext.c = c;
93+ mBuilderContext.Rctx = obtainReg(); //dynamically obtain if used and then immediately recycle it if not used
94+ mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
95+
96+ // ------------------------------------------------------------------------
97+
98+ decodeLogicOpNeeds(needs);
99+
100+ decodeTMUNeeds(needs, c);
101+
102+ mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
103+ mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
104+ mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
105+ mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
106+
107+ if (!mCbFormat.c[GGLFormat::ALPHA].h) {
108+ if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
109+ (mBlendSrc == GGL_DST_ALPHA)) {
110+ mBlendSrc = GGL_ONE;
111+ }
112+ if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
113+ (mBlendSrcA == GGL_DST_ALPHA)) {
114+ mBlendSrcA = GGL_ONE;
115+ }
116+ if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
117+ (mBlendDst == GGL_DST_ALPHA)) {
118+ mBlendDst = GGL_ONE;
119+ }
120+ if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
121+ (mBlendDstA == GGL_DST_ALPHA)) {
122+ mBlendDstA = GGL_ONE;
123+ }
124+ }
125+
126+ // if we need the framebuffer, read it now
127+ const int blending = blending_codes(mBlendSrc, mBlendDst) |
128+ blending_codes(mBlendSrcA, mBlendDstA);
129+
130+ // XXX: handle special cases, destination not modified...
131+ if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
132+ (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
133+ // Destination unmodified (beware of logic ops)
134+ } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
135+ (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
136+ // Destination is zero (beware of logic ops)
137+ }
138+
139+ int fbComponents = 0;
140+ const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
141+ for (int i=0 ; i<4 ; i++) {
142+ const int mask = 1<<i;
143+ component_info_t& info = mInfo[i];
144+ int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
145+ int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
146+ if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
147+ fs = GGL_ONE;
148+ info.masked = !!(masking & mask);
149+ info.inDest = !info.masked && mCbFormat.c[i].h &&
150+ ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
151+ if (mCbFormat.components >= GGL_LUMINANCE &&
152+ (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
153+ info.inDest = false;
154+ }
155+ info.needed = (i==GGLFormat::ALPHA) &&
156+ (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
157+ info.replaced = !!(mTextureMachine.replaced & mask);
158+ info.iterated = (!info.replaced && (info.inDest || info.needed));
159+ info.smooth = mSmooth && info.iterated;
160+ info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
161+ info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
162+
163+ mBlending |= (info.blend ? mask : 0);
164+ mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
165+ fbComponents |= mCbFormat.c[i].h ? mask : 0;
166+ }
167+
168+ mAllMasked = (mMasking == fbComponents);
169+ if (mAllMasked) {
170+ mDithering = 0;
171+ }
172+
173+ fragment_parts_t parts;
174+
175+ // ------------------------------------------------------------------------
176+ callee_work();
177+ // ------------------------------------------------------------------------
178+
179+ mCurSp = -12; // %ebx, %edi, %esi
180+ prepare_esp(0);
181+ build_scanline_preparation(parts, needs);
182+ recycleReg(mBuilderContext.Rctx);
183+
184+ if (registerFile().status())
185+ return registerFile().status();
186+
187+ // ------------------------------------------------------------------------
188+ label("fragment_loop");
189+ // ------------------------------------------------------------------------
190+ {
191+ Scratch regs(registerFile());
192+ int temp_reg = -1;
193+
194+ if (mDithering) {
195+ // update the dither index.
196+ temp_reg = regs.obtain();
197+ //To load to register and calculate should be fast than the memory operations
198+ MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, temp_reg);
199+ ROR(GGL_DITHER_ORDER_SHIFT, temp_reg);
200+ ADD_IMM_TO_REG(1 << (32 - GGL_DITHER_ORDER_SHIFT), temp_reg);
201+ ROR(32 - GGL_DITHER_ORDER_SHIFT, temp_reg);
202+ MOV_REG_TO_MEM(temp_reg, parts.count.offset_ebp, PhysicalReg_EBP);
203+ regs.recycle(temp_reg);
204+
205+ }
206+
207+ // XXX: could we do an early alpha-test here in some cases?
208+ // It would probaly be used only with smooth-alpha and no texture
209+ // (or no alpha component in the texture).
210+
211+ // Early z-test
212+ if (mAlphaTest==GGL_ALWAYS) {
213+ build_depth_test(parts, Z_TEST|Z_WRITE);
214+ } else {
215+ // we cannot do the z-write here, because
216+ // it might be killed by the alpha-test later
217+ build_depth_test(parts, Z_TEST);
218+ }
219+
220+ { // texture coordinates
221+ Scratch scratches(registerFile());
222+
223+ // texel generation
224+ build_textures(parts, regs);
225+
226+ }
227+
228+ if ((blending & (FACTOR_DST|BLEND_DST)) ||
229+ (mMasking && !mAllMasked) ||
230+ (mLogicOp & LOGIC_OP_DST))
231+ {
232+ // blending / logic_op / masking need the framebuffer
233+ mDstPixel.setTo(regs.obtain(), &mCbFormat);
234+
235+ // load the framebuffer pixel
236+ comment("fetch color-buffer");
237+ parts.cbPtr.reg = regs.obtain();
238+ MOV_MEM_TO_REG(parts.cbPtr.offset_ebp, PhysicalReg_EBP, parts.cbPtr.reg);
239+ load(parts.cbPtr, mDstPixel);
240+ mCurSp = mCurSp - 4;
241+ mDstPixel.offset_ebp = mCurSp;
242+ MOV_REG_TO_MEM(mDstPixel.reg, mDstPixel.offset_ebp, EBP);
243+ regs.recycle(mDstPixel.reg);
244+ regs.recycle(parts.cbPtr.reg);
245+ mDstPixel.reg = -1;
246+ }
247+
248+ if (registerFile().status())
249+ return registerFile().status();
250+
251+ pixel_t pixel;
252+ int directTex = mTextureMachine.directTexture;
253+ if (directTex | parts.packed) {
254+ // note: we can't have both here
255+ // iterated color or direct texture
256+ if(directTex) {
257+ pixel.offset_ebp = parts.texel[directTex-1].offset_ebp;
258+ }
259+ else
260+ pixel.offset_ebp = parts.iterated.offset_ebp;
261+ pixel.reg = regs.obtain();
262+ MOV_MEM_TO_REG(pixel.offset_ebp, EBP, pixel.reg);
263+ //pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
264+ pixel.flags &= ~CORRUPTIBLE;
265+ } else {
266+ if (mDithering) {
267+ mBuilderContext.Rctx = regs.obtain();
268+ temp_reg = regs.obtain();
269+ const int ctxtReg = mBuilderContext.Rctx;
270+ MOV_MEM_TO_REG(8, EBP, ctxtReg);
271+ const int mask = GGL_DITHER_SIZE-1;
272+ parts.dither = reg_t(regs.obtain());
273+ MOV_MEM_TO_REG(parts.count.offset_ebp, EBP, parts.dither.reg);
274+ AND_IMM_TO_REG(mask, parts.dither.reg);
275+ ADD_REG_TO_REG(ctxtReg, parts.dither.reg);
276+ MOVZX_MEM_TO_REG(OpndSize_8, parts.dither.reg, GGL_OFFSETOF(ditherMatrix), temp_reg);
277+ MOV_REG_TO_REG(temp_reg, parts.dither.reg);
278+ mCurSp = mCurSp - 4;
279+ parts.dither.offset_ebp = mCurSp;
280+ MOV_REG_TO_MEM(parts.dither.reg, parts.dither.offset_ebp, EBP);
281+ regs.recycle(parts.dither.reg);
282+ regs.recycle(temp_reg);
283+ regs.recycle(mBuilderContext.Rctx);
284+
285+ }
286+
287+ // allocate a register for the resulting pixel
288+ pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
289+
290+ build_component(pixel, parts, GGLFormat::ALPHA, regs);
291+
292+ if (mAlphaTest!=GGL_ALWAYS) {
293+ // only handle the z-write part here. We know z-test
294+ // was successful, as well as alpha-test.
295+ build_depth_test(parts, Z_WRITE);
296+ }
297+
298+ build_component(pixel, parts, GGLFormat::RED, regs);
299+ build_component(pixel, parts, GGLFormat::GREEN, regs);
300+ build_component(pixel, parts, GGLFormat::BLUE, regs);
301+
302+ pixel.flags |= CORRUPTIBLE;
303+ }
304+
305+ if (registerFile().status()) {
306+ return registerFile().status();
307+ }
308+
309+ if (pixel.reg == -1) {
310+ // be defensive here. if we're here it's probably
311+ // that this whole fragment is a no-op.
312+ pixel = mDstPixel;
313+ }
314+
315+ if (!mAllMasked) {
316+ // logic operation
317+ build_logic_op(pixel, regs);
318+
319+ // masking
320+ build_masking(pixel, regs);
321+
322+ comment("store");
323+ parts.cbPtr.reg = regs.obtain();
324+ MOV_MEM_TO_REG(parts.cbPtr.offset_ebp, EBP, parts.cbPtr.reg);
325+ store(parts.cbPtr, pixel, WRITE_BACK);
326+ MOV_REG_TO_MEM(parts.cbPtr.reg, parts.cbPtr.offset_ebp, EBP);
327+ regs.recycle(parts.cbPtr.reg);
328+ regs.recycle(pixel.reg);
329+ }
330+ }
331+
332+ if (registerFile().status())
333+ return registerFile().status();
334+
335+ // update the iterated color...
336+ if (parts.reload != 3) {
337+ build_smooth_shade(parts);
338+ }
339+
340+ // update iterated z
341+ build_iterate_z(parts);
342+
343+ // update iterated fog
344+ build_iterate_f(parts);
345+
346+ //SUB_IMM_TO_REG(1<<16, parts.count.reg);
347+ SUB_IMM_TO_MEM(1<<16, parts.count.offset_ebp, EBP);
348+
349+ JCC(Mnemonic_JNS, "fragment_loop");
350+ label("fragment_end");
351+ int update_esp_offset, shrink_esp_offset;
352+ update_esp_offset = shrink_esp_offset = -mCurSp - 12; // 12 is ebx, esi, edi
353+ update_esp(update_esp_offset);
354+ shrink_esp(shrink_esp_offset);
355+ return_work();
356+
357+ if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
358+ if (mDepthTest!=GGL_ALWAYS) {
359+ label("discard_before_textures");
360+ build_iterate_texture_coordinates(parts);
361+ }
362+ label("discard_after_textures");
363+ build_smooth_shade(parts);
364+ build_iterate_z(parts);
365+ build_iterate_f(parts);
366+ if (!mAllMasked) {
367+ //ADD_IMM_TO_REG(parts.cbPtr.size>>3, parts.cbPtr.reg);
368+ ADD_IMM_TO_MEM(parts.cbPtr.size>>3, parts.cbPtr.offset_ebp, EBP);
369+ }
370+ SUB_IMM_TO_MEM(1<<16, parts.count.offset_ebp, EBP);
371+ //SUB_IMM_TO_REG(1<<16, parts.count.reg);
372+ JCC(Mnemonic_JNS, "fragment_loop");
373+ update_esp_offset = shrink_esp_offset = -mCurSp - 12; // 12 is ebx, esi, edi
374+ update_esp(update_esp_offset);
375+ shrink_esp(shrink_esp_offset);
376+ return_work();
377+ }
378+
379+ return registerFile().status();
380+}
381+
382+// ---------------------------------------------------------------------------
383+
384+void GGLX86Assembler::build_scanline_preparation(
385+ fragment_parts_t& parts, const needs_t& needs)
386+{
387+ Scratch scratches(registerFile());
388+
389+ // compute count
390+ comment("compute ct (# of pixels to process)");
391+ int temp_reg;
392+ parts.count.setTo(obtainReg());
393+ int Rx = scratches.obtain();
394+ int Ry = scratches.obtain();
395+ // the only argument is +8 bytes relative to the current EBP
396+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
397+ CONTEXT_LOAD(Rx, iterators.xl);
398+ CONTEXT_LOAD(parts.count.reg, iterators.xr);
399+ CONTEXT_LOAD(Ry, iterators.y);
400+
401+ // parts.count = iterators.xr - Rx
402+ SUB_REG_TO_REG(Rx, parts.count.reg);
403+ SUB_IMM_TO_REG(1, parts.count.reg);
404+
405+ if (mDithering) {
406+ // parts.count.reg = 0xNNNNXXDD
407+ // NNNN = count-1
408+ // DD = dither offset
409+ // XX = 0xxxxxxx (x = garbage)
410+ Scratch scratches(registerFile());
411+ int tx = scratches.obtain();
412+ int ty = scratches.obtain();
413+
414+ MOV_REG_TO_REG(Rx,tx);
415+ AND_IMM_TO_REG(GGL_DITHER_MASK, tx);
416+ MOV_REG_TO_REG(Ry,ty);
417+ AND_IMM_TO_REG(GGL_DITHER_MASK, ty);
418+ SHL(GGL_DITHER_ORDER_SHIFT, ty);
419+ ADD_REG_TO_REG(ty, tx);
420+ SHL(16, parts.count.reg);
421+ OR_REG_TO_REG(tx, parts.count.reg);
422+ scratches.recycle(tx);
423+ scratches.recycle(ty);
424+ } else {
425+ // parts.count.reg = 0xNNNN0000
426+ // NNNN = count-1
427+ SHL(16, parts.count.reg);
428+ }
429+ mCurSp = mCurSp - 4;
430+ parts.count.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg
431+ MOV_REG_TO_MEM(parts.count.reg, parts.count.offset_ebp, EBP);
432+ //PUSH(parts.count.reg);
433+ recycleReg(parts.count.reg);
434+ parts.count.reg=-1;
435+ if (!mAllMasked) {
436+ // compute dst ptr
437+ comment("compute color-buffer pointer");
438+ const int cb_bits = mCbFormat.size*8;
439+ int Rs = scratches.obtain();
440+ temp_reg = scratches.obtain();
441+ CONTEXT_LOAD(Rs, state.buffers.color.stride);
442+ MOVSX_REG_TO_REG(OpndSize_16, Ry, temp_reg);
443+ MOVSX_REG_TO_REG(OpndSize_16, Rs, Rs);
444+ IMUL(temp_reg, Rs);
445+ scratches.recycle(temp_reg);
446+ ADD_REG_TO_REG(Rx, Rs);
447+
448+ parts.cbPtr.setTo(obtainReg(), cb_bits);
449+ CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
450+ reg_t temp_reg_t;
451+ temp_reg_t.setTo(Rs);
452+ base_offset(parts.cbPtr, parts.cbPtr, temp_reg_t);
453+
454+ mCurSp = mCurSp - 4;
455+ parts.cbPtr.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg
456+ MOV_REG_TO_MEM(parts.cbPtr.reg, parts.cbPtr.offset_ebp, EBP);
457+ //PUSH(parts.cbPtr.reg);
458+ recycleReg(parts.cbPtr.reg);
459+ parts.cbPtr.reg=-1;
460+ scratches.recycle(Rs);
461+ }
462+
463+ // init fog
464+ const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
465+ if (need_fog) {
466+ comment("compute initial fog coordinate");
467+ Scratch scratches(registerFile());
468+ int ydfdy = scratches.obtain();
469+ int dfdx = scratches.obtain();
470+ CONTEXT_LOAD(dfdx, generated_vars.dfdx);
471+ IMUL(Rx, dfdx);
472+ CONTEXT_LOAD(ydfdy, iterators.ydfdy);
473+ ADD_REG_TO_REG(ydfdy, dfdx); // Rx * dfdx + ydfdy
474+ CONTEXT_STORE(dfdx, generated_vars.f);
475+ scratches.recycle(dfdx);
476+ scratches.recycle(ydfdy);
477+ }
478+
479+ // init Z coordinate
480+ if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
481+ parts.z = reg_t(obtainReg());
482+ comment("compute initial Z coordinate");
483+ Scratch scratches(registerFile());
484+ int dzdx = scratches.obtain();
485+ int ydzdy = parts.z.reg;
486+ CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
487+ IMUL(Rx, dzdx);
488+ CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
489+ ADD_REG_TO_REG(dzdx, ydzdy); // parts.z.reg = Rx * dzdx + ydzdy
490+
491+ mCurSp = mCurSp - 4;
492+ parts.z.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg, parts.z.reg
493+ MOV_REG_TO_MEM(ydzdy, parts.z.offset_ebp, EBP);
494+ //PUSH(ydzdy);
495+ recycleReg(ydzdy);
496+ parts.z.reg=-1;
497+
498+ // we're going to index zbase of parts.count
499+ // zbase = base + (xl-count + stride*y)*2 by arm
500+ // !!! Actually, zbase = base + (xl + stride*y)*2
501+ int Rs = dzdx;
502+ int zbase = scratches.obtain();
503+ temp_reg = zbase;
504+ CONTEXT_LOAD(Rs, state.buffers.depth.stride);
505+ MOVSX_REG_TO_REG(OpndSize_16, Rs, Rs);
506+ MOV_REG_TO_REG(Ry, temp_reg);
507+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg, temp_reg);
508+ IMUL(temp_reg, Rs);
509+ ADD_REG_TO_REG(Rx, Rs);
510+ // load parts.count.reg
511+ MOV_MEM_TO_REG(parts.count.offset_ebp, EBP, temp_reg);
512+ SHR(16, temp_reg);
513+ ADD_REG_TO_REG(temp_reg, Rs);
514+ SHL(1, Rs);
515+ CONTEXT_LOAD(zbase, state.buffers.depth.data);
516+ ADD_REG_TO_REG(Rs, zbase);
517+ CONTEXT_STORE(zbase, generated_vars.zbase);
518+ scratches.recycle(zbase);
519+ scratches.recycle(dzdx);
520+ }
521+ // the rgisters are all used up
522+
523+ // init texture coordinates
524+ init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
525+ scratches.recycle(Ry);
526+
527+ // iterated color
528+ init_iterated_color(parts, reg_t(Rx));
529+
530+ // init coverage factor application (anti-aliasing)
531+ if (mAA) {
532+ parts.covPtr.setTo(obtainReg(), 16);
533+ CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
534+ SHL(1, Rx);
535+ ADD_REG_TO_REG(Rx, parts.covPtr.reg);
536+
537+ mCurSp = mCurSp - 4;
538+ parts.covPtr.offset_ebp = mCurSp;
539+ MOV_REG_TO_MEM(parts.covPtr.reg, parts.covPtr.offset_ebp, EBP);
540+ //PUSH(parts.covPtr.reg);
541+ recycleReg(parts.covPtr.reg);
542+ parts.covPtr.reg=-1;
543+ }
544+ scratches.recycle(Rx);
545+}
546+
547+// ---------------------------------------------------------------------------
548+
549+void GGLX86Assembler::build_component( pixel_t& pixel,
550+ fragment_parts_t& parts,
551+ int component,
552+ Scratch& regs)
553+{
554+ static char const * comments[] = {"alpha", "red", "green", "blue"};
555+ comment(comments[component]);
556+
557+ // local register file
558+ Scratch scratches(registerFile());
559+ const int dst_component_size = pixel.component_size(component);
560+
561+ component_t temp(-1);
562+ build_incoming_component( temp, dst_component_size,
563+ parts, component, scratches, regs);
564+
565+ if (mInfo[component].inDest) {
566+ // blending...
567+ build_blending( temp, mDstPixel, component, scratches );
568+
569+ // downshift component and rebuild pixel...
570+ downshift(pixel, component, temp, parts.dither);
571+ }
572+}
573+
574+void GGLX86Assembler::build_incoming_component(
575+ component_t& temp,
576+ int dst_size,
577+ fragment_parts_t& parts,
578+ int component,
579+ Scratch& scratches,
580+ Scratch& global_regs)
581+{
582+ const uint32_t component_mask = 1<<component;
583+
584+ // Figure out what we need for the blending stage...
585+ int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
586+ int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
587+ if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
588+ fs = GGL_ONE;
589+ }
590+
591+ // Figure out what we need to extract and for what reason
592+ const int blending = blending_codes(fs, fd);
593+
594+ // Are we actually going to blend?
595+ const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
596+
597+ // expand the source if the destination has more bits
598+ int need_expander = false;
599+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
600+ texture_unit_t& tmu = mTextureMachine.tmu[i];
601+ if ((tmu.format_idx) &&
602+ (parts.texel[i].component_size(component) < dst_size)) {
603+ need_expander = true;
604+ }
605+ }
606+
607+ // do we need to extract this component?
608+ const bool multiTexture = mTextureMachine.activeUnits > 1;
609+ const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
610+ (isAlphaSourceNeeded());
611+ int need_extract = mInfo[component].needed;
612+ if (mInfo[component].inDest)
613+ {
614+ need_extract |= ((need_blending ?
615+ (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
616+ need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
617+ need_extract |= mInfo[component].smooth;
618+ need_extract |= mInfo[component].fog;
619+ need_extract |= mDithering;
620+ need_extract |= multiTexture;
621+ }
622+
623+ if (need_extract) {
624+ Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
625+ component_t fragment;
626+
627+ // iterated color
628+ fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE);
629+ build_iterated_color(fragment, parts, component, regs);
630+
631+ // texture environment (decal, modulate, replace)
632+ build_texture_environment(fragment, parts, component, regs);
633+
634+ // expand the source if the destination has more bits
635+ if (need_expander && (fragment.size() < dst_size)) {
636+ // we're here only if we fetched a texel
637+ // (so we know for sure fragment is CORRUPTIBLE)
638+ //fragment is stored on the stack
639+ expand(fragment, fragment, dst_size);
640+ }
641+
642+ mCurSp = mCurSp - 4;
643+ fragment.offset_ebp = mCurSp;
644+ MOV_REG_TO_MEM(fragment.reg, fragment.offset_ebp, EBP);
645+ regs.recycle(fragment.reg);
646+
647+ // We have a few specific things to do for the alpha-channel
648+ if ((component==GGLFormat::ALPHA) &&
649+ (mInfo[component].needed || fragment.size()<dst_size))
650+ {
651+ // convert to integer_t first and make sure
652+ // we don't corrupt a needed register
653+ if (fragment.l) {
654+ //component_t incoming(fragment);
655+ // actually fragment is not corruptible
656+ //modify(fragment, regs);
657+ //MOV_REG_TO_REG(incoming.reg, fragment.reg);
658+ SHR(fragment.l, fragment.offset_ebp, EBP);
659+ fragment.h -= fragment.l;
660+ fragment.l = 0;
661+ }
662+
663+ // I haven't found any case to trigger coverage and the following alpha test (mAlphaTest != GGL_ALWAYS)
664+ fragment.reg = regs.obtain();
665+ MOV_MEM_TO_REG(fragment.offset_ebp, EBP, fragment.reg);
666+
667+ // coverage factor application
668+ build_coverage_application(fragment, parts, regs);
669+ // alpha-test
670+ build_alpha_test(fragment, parts);
671+
672+ MOV_REG_TO_MEM(fragment.reg, fragment.offset_ebp, EBP);
673+ regs.recycle(fragment.reg);
674+
675+ if (blend_needs_alpha_source) {
676+ // We keep only 8 bits for the blending stage
677+ const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
678+
679+ if (fragment.flags & CORRUPTIBLE) {
680+ fragment.flags &= ~CORRUPTIBLE;
681+ mAlphaSource.setTo(fragment.reg,
682+ fragment.size(), fragment.flags, fragment.offset_ebp);
683+ //mCurSp = mCurSp - 4;
684+ //mAlphaSource.offset_ebp = mCurSp;
685+ if (shift) {
686+ SHR(shift, mAlphaSource.offset_ebp, EBP);
687+ }
688+ } else {
689+ // XXX: it would better to do this in build_blend_factor()
690+ // so we can avoid the extra MOV below.
691+ mAlphaSource.setTo(regs.obtain(),
692+ fragment.size(), CORRUPTIBLE);
693+ mCurSp = mCurSp - 4;
694+ mAlphaSource.offset_ebp = mCurSp;
695+ if (shift) {
696+ MOV_MEM_TO_REG(fragment.offset_ebp, EBP, mAlphaSource.reg);
697+ SHR(shift, mAlphaSource.reg);
698+ } else {
699+ MOV_MEM_TO_REG(fragment.offset_ebp, EBP, mAlphaSource.reg);
700+ }
701+ MOV_REG_TO_MEM(mAlphaSource.reg, mAlphaSource.offset_ebp, EBP);
702+ regs.recycle(mAlphaSource.reg);
703+ }
704+ mAlphaSource.s -= shift;
705+
706+ }
707+ }
708+
709+ // fog...
710+ build_fog( fragment, component, regs );
711+
712+ temp = fragment;
713+ } else {
714+ if (mInfo[component].inDest) {
715+ // extraction not needed and replace
716+ // we just select the right component
717+ if ((mTextureMachine.replaced & component_mask) == 0) {
718+ // component wasn't replaced, so use it!
719+ temp = component_t(parts.iterated, component);
720+ }
721+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
722+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
723+ if ((tmu.mask & component_mask) &&
724+ ((tmu.replaced & component_mask) == 0)) {
725+ temp = component_t(parts.texel[i], component);
726+ }
727+ }
728+ }
729+ }
730+}
731+
732+bool GGLX86Assembler::isAlphaSourceNeeded() const
733+{
734+ // XXX: also needed for alpha-test
735+ const int bs = mBlendSrc;
736+ const int bd = mBlendDst;
737+ return bs==GGL_SRC_ALPHA_SATURATE ||
738+ bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
739+ bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
740+}
741+
742+// ---------------------------------------------------------------------------
743+
744+void GGLX86Assembler::build_smooth_shade(fragment_parts_t& parts)
745+{
746+ if (mSmooth && !parts.iterated_packed) {
747+ // update the iterated color in a pipelined way...
748+ comment("update iterated color");
749+ Scratch scratches(registerFile());
750+ mBuilderContext.Rctx = scratches.obtain();
751+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
752+
753+ const int reload = parts.reload;
754+ for (int i=0 ; i<4 ; i++) {
755+ if (!mInfo[i].iterated)
756+ continue;
757+
758+ int dx = parts.argb_dx[i].reg;
759+ int c = parts.argb[i].reg;
760+ dx = scratches.obtain();
761+ c = scratches.obtain();
762+ CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
763+ CONTEXT_LOAD(c, generated_vars.argb[i].c);
764+
765+ //if (reload & 1) {
766+ // c = scratches.obtain();
767+ // CONTEXT_LOAD(c, generated_vars.argb[i].c);
768+ //}
769+ //if (reload & 2) {
770+ // dx = scratches.obtain();
771+ // CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
772+ //}
773+
774+ if (mSmooth) {
775+ ADD_REG_TO_REG(dx, c);
776+ }
777+
778+ CONTEXT_STORE(c, generated_vars.argb[i].c);
779+ scratches.recycle(c);
780+ scratches.recycle(dx);
781+ //if (reload & 1) {
782+ // CONTEXT_STORE(c, generated_vars.argb[i].c);
783+ // scratches.recycle(c);
784+ //}
785+ //if (reload & 2) {
786+ // scratches.recycle(dx);
787+ //}
788+ }
789+ scratches.recycle(mBuilderContext.Rctx);
790+ }
791+}
792+
793+// ---------------------------------------------------------------------------
794+
795+void GGLX86Assembler::build_coverage_application(component_t& fragment,
796+ fragment_parts_t& parts, Scratch& regs)
797+{
798+ // here fragment.l is guarenteed to be 0
799+ if (mAA) {
800+ // coverages are 1.15 fixed-point numbers
801+ comment("coverage application");
802+
803+ component_t incoming(fragment);
804+ modify(fragment, regs);
805+
806+ Scratch scratches(registerFile());
807+ int cf = scratches.obtain();
808+ parts.covPtr.reg = scratches.obtain();
809+ MOV_MEM_TO_REG(parts.covPtr.offset_ebp, EBP, parts.covPtr.reg);
810+ MOVZX_MEM_TO_REG(OpndSize_16, parts.covPtr.reg, 2, cf); // refer to LDRH definition
811+ scratches.recycle(parts.covPtr.reg);
812+ if (fragment.h > 31) {
813+ fragment.h--;
814+
815+ int flag_push_edx = 0;
816+ int flag_reserve_edx = 0;
817+ int temp_reg2 = -1;
818+ int edx_offset_ebp = 0;
819+ if(scratches.isUsed(EDX) == 1) {
820+ if(incoming.reg != EDX && cf != EDX) {
821+ flag_push_edx = 1;
822+ mCurSp = mCurSp - 4;
823+ edx_offset_ebp = mCurSp;
824+ MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP);
825+ }
826+ }
827+ else {
828+ flag_reserve_edx = 1;
829+ scratches.reserve(EDX);
830+ }
831+ if(scratches.isUsed(EAX)) {
832+ if( cf == EAX || incoming.reg == EAX) {
833+ MOVSX_REG_TO_REG(OpndSize_16, cf, cf);
834+ if(cf == EAX)
835+ IMUL(incoming.reg);
836+ else
837+ IMUL(cf);
838+ SHL(16, EDX);
839+ SHR(16, EAX);
840+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
841+ MOV_REG_TO_REG(EDX, incoming.reg);
842+ }
843+ else {
844+ int eax_offset_ebp = 0;
845+ if(scratches.countFreeRegs() > 0) {
846+ temp_reg2 = scratches.obtain();
847+ MOV_REG_TO_REG(EAX, temp_reg2);
848+ }
849+ else {
850+ mCurSp = mCurSp - 4;
851+ eax_offset_ebp = mCurSp;
852+ MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP);
853+ }
854+ MOV_REG_TO_REG(cf, EAX);
855+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
856+ IMUL(incoming.reg);
857+ SHL(16, EDX);
858+ SHR(16, EAX);
859+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
860+ MOV_REG_TO_REG(EDX, incoming.reg);
861+ if(temp_reg2 > -1) {
862+ MOV_REG_TO_REG(temp_reg2, EAX);
863+ scratches.recycle(temp_reg2);
864+ }
865+ else {
866+ MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX);
867+ }
868+ }
869+ }
870+ else {
871+ MOV_REG_TO_REG(cf, EAX);
872+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
873+ IMUL(incoming.reg);
874+ SHL(16, EDX);
875+ SHR(16, EAX);
876+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
877+ MOV_REG_TO_REG(EDX, incoming.reg);
878+ }
879+ if(flag_push_edx == 1) {
880+ MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX);
881+ }
882+ if(flag_reserve_edx ==1)
883+ scratches.recycle(EDX);
884+
885+ MOV_REG_TO_REG(incoming.reg, fragment.reg);
886+
887+ //IMUL(cf, incoming.reg);
888+ } else {
889+ MOV_REG_TO_REG(incoming.reg, fragment.reg);
890+ SHL(1, fragment.reg);
891+
892+ int flag_push_edx = 0;
893+ int flag_reserve_edx = 0;
894+ int temp_reg2 = -1;
895+ int edx_offset_ebp = 0;
896+ if(scratches.isUsed(EDX) == 1) {
897+ if(fragment.reg != EDX && cf != EDX) {
898+ flag_push_edx = 1;
899+ mCurSp = mCurSp - 4;
900+ edx_offset_ebp = mCurSp;
901+ MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP);
902+ }
903+ }
904+ else {
905+ flag_reserve_edx = 1;
906+ scratches.reserve(EDX);
907+ }
908+ if(scratches.isUsed(EAX)) {
909+ if( cf == EAX || fragment.reg == EAX) {
910+ MOVSX_REG_TO_REG(OpndSize_16, cf, cf);
911+ if(cf == EAX)
912+ IMUL(fragment.reg);
913+ else
914+ IMUL(cf);
915+ SHL(16, EDX);
916+ SHR(16, EAX);
917+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
918+ MOV_REG_TO_REG(EDX, fragment.reg);
919+ }
920+ else {
921+ int eax_offset_ebp = 0;
922+ if(scratches.countFreeRegs() > 0) {
923+ temp_reg2 = scratches.obtain();
924+ MOV_REG_TO_REG(EAX, temp_reg2);
925+ }
926+ else {
927+ mCurSp = mCurSp - 4;
928+ eax_offset_ebp = mCurSp;
929+ MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP);
930+ }
931+ MOV_REG_TO_REG(cf, EAX);
932+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
933+ IMUL(fragment.reg);
934+ SHL(16, EDX);
935+ SHR(16, EAX);
936+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
937+ MOV_REG_TO_REG(EDX, fragment.reg);
938+ if(temp_reg2 > -1) {
939+ MOV_REG_TO_REG(temp_reg2, EAX);
940+ scratches.recycle(temp_reg2);
941+ }
942+ else {
943+ MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX);
944+ }
945+ }
946+ }
947+ else {
948+ MOV_REG_TO_REG(cf, EAX);
949+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
950+ IMUL(fragment.reg);
951+ SHL(16, EDX);
952+ SHR(16, EAX);
953+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
954+ MOV_REG_TO_REG(EDX, fragment.reg);
955+ }
956+ if(flag_push_edx == 1) {
957+ MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX);
958+ }
959+ if(flag_reserve_edx ==1)
960+ scratches.recycle(EDX);
961+
962+ //IMUL(cf, fragment.reg);
963+ }
964+ scratches.recycle(cf);
965+ }
966+}
967+
968+// ---------------------------------------------------------------------------
969+
970+void GGLX86Assembler::build_alpha_test(component_t& fragment,
971+ const fragment_parts_t& parts)
972+{
973+ if (mAlphaTest != GGL_ALWAYS) {
974+ comment("Alpha Test");
975+ Scratch scratches(registerFile());
976+ int ref = scratches.obtain();
977+ mBuilderContext.Rctx = scratches.obtain();
978+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
979+ const int shift = GGL_COLOR_BITS-fragment.size();
980+ CONTEXT_LOAD(ref, state.alpha_test.ref);
981+ scratches.recycle(mBuilderContext.Rctx);
982+ if (shift) {
983+ SHR(shift, ref);
984+ CMP_REG_TO_REG(ref, fragment.reg);
985+ } else CMP_REG_TO_REG(ref, fragment.reg);
986+ Mnemonic cc = Mnemonic_NULL;
987+ //int cc = NV;
988+ switch (mAlphaTest) {
989+ case GGL_NEVER:
990+ JMP("discard_after_textures");
991+ return;
992+ break;
993+ case GGL_LESS:
994+ cc = Mnemonic_JNL;
995+ break;
996+ case GGL_EQUAL:
997+ cc = Mnemonic_JNE;
998+ break;
999+ case GGL_LEQUAL:
1000+ cc = Mnemonic_JB;
1001+ break;
1002+ case GGL_GREATER:
1003+ cc = Mnemonic_JLE;
1004+ break;
1005+ case GGL_NOTEQUAL:
1006+ cc = Mnemonic_JE;
1007+ break;
1008+ case GGL_GEQUAL:
1009+ cc = Mnemonic_JNC;
1010+ break;
1011+ }
1012+ JCC(cc, "discard_after_textures");
1013+ //B(cc^1, "discard_after_textures");
1014+ }
1015+}
1016+
1017+// ---------------------------------------------------------------------------
1018+
1019+void GGLX86Assembler::build_depth_test(
1020+ const fragment_parts_t& parts, uint32_t mask)
1021+{
1022+ mask &= Z_TEST|Z_WRITE;
1023+ int store_flag = 0;
1024+ const needs_t& needs = mBuilderContext.needs;
1025+ const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
1026+ Scratch scratches(registerFile());
1027+
1028+ if (mDepthTest != GGL_ALWAYS || zmask) {
1029+ Mnemonic ic = Mnemonic_NULL;
1030+ switch (mDepthTest) {
1031+ case GGL_LESS:
1032+ ic = Mnemonic_JBE;
1033+ break;
1034+ case GGL_EQUAL:
1035+ ic = Mnemonic_JNE;
1036+ break;
1037+ case GGL_LEQUAL:
1038+ ic = Mnemonic_JB;
1039+ break;
1040+ case GGL_GREATER:
1041+ ic = Mnemonic_JGE;
1042+ break;
1043+ case GGL_NOTEQUAL:
1044+ ic = Mnemonic_JE;
1045+ break;
1046+ case GGL_GEQUAL:
1047+ ic = Mnemonic_JA;
1048+ break;
1049+ case GGL_NEVER:
1050+ // this never happens, because it's taken care of when
1051+ // computing the needs. but we keep it for completness.
1052+ comment("Depth Test (NEVER)");
1053+ JMP("discard_before_textures");
1054+ return;
1055+ case GGL_ALWAYS:
1056+ // we're here because zmask is enabled
1057+ mask &= ~Z_TEST; // test always passes.
1058+ break;
1059+ }
1060+
1061+
1062+ if ((mask & Z_WRITE) && !zmask) {
1063+ mask &= ~Z_WRITE;
1064+ }
1065+
1066+ if (!mask)
1067+ return;
1068+
1069+ comment("Depth Test");
1070+
1071+ int zbase = scratches.obtain();
1072+ mBuilderContext.Rctx = scratches.obtain();
1073+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
1074+ CONTEXT_LOAD(zbase, generated_vars.zbase); // stall
1075+ scratches.recycle(mBuilderContext.Rctx);
1076+
1077+ int temp_reg1 = scratches.obtain();
1078+ int depth = scratches.obtain();
1079+ int z = parts.z.reg;
1080+ MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, temp_reg1);
1081+ SHR(15, temp_reg1);
1082+ SUB_REG_TO_REG(temp_reg1, zbase);
1083+
1084+ // above does zbase = zbase + ((count >> 16) << 1)
1085+
1086+ if (mask & Z_TEST) {
1087+ MOVZX_MEM_TO_REG(OpndSize_16, zbase, 0, depth);
1088+ MOV_MEM_TO_REG(parts.z.offset_ebp, PhysicalReg_EBP, temp_reg1);
1089+ SHR(16, temp_reg1);
1090+ CMP_REG_TO_REG(temp_reg1, depth);
1091+ JCC(ic, "discard_before_textures");
1092+
1093+ }
1094+ if (mask & Z_WRITE) {
1095+ if (mask == Z_WRITE) {
1096+ // only z-write asked, cc is meaningless
1097+ store_flag = 1;
1098+ }
1099+ // actually it must be stored since the above branch is not taken
1100+ MOV_REG_TO_MEM(temp_reg1, 0, zbase, OpndSize_16);
1101+ }
1102+ scratches.recycle(temp_reg1);
1103+ scratches.recycle(zbase);
1104+ scratches.recycle(depth);
1105+ }
1106+}
1107+
1108+void GGLX86Assembler::build_iterate_z(const fragment_parts_t& parts)
1109+{
1110+ const needs_t& needs = mBuilderContext.needs;
1111+ if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
1112+ Scratch scratches(registerFile());
1113+ int dzdx = scratches.obtain();
1114+ mBuilderContext.Rctx = scratches.obtain();
1115+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
1116+ CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
1117+ scratches.recycle(mBuilderContext.Rctx);
1118+ ADD_REG_TO_MEM(dzdx, EBP, parts.z.offset_ebp);
1119+ scratches.recycle(dzdx);
1120+ }
1121+}
1122+
1123+void GGLX86Assembler::build_iterate_f(const fragment_parts_t& parts)
1124+{
1125+ const needs_t& needs = mBuilderContext.needs;
1126+ if (GGL_READ_NEEDS(P_FOG, needs.p)) {
1127+ Scratch scratches(registerFile());
1128+ int dfdx = scratches.obtain();
1129+ int f = scratches.obtain();
1130+ mBuilderContext.Rctx = scratches.obtain();
1131+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
1132+ CONTEXT_LOAD(f, generated_vars.f);
1133+ CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
1134+ ADD_REG_TO_REG(dfdx, f);
1135+ CONTEXT_STORE(f, generated_vars.f);
1136+ scratches.recycle(mBuilderContext.Rctx);
1137+ scratches.recycle(dfdx);
1138+ scratches.recycle(f);
1139+ }
1140+}
1141+
1142+// ---------------------------------------------------------------------------
1143+
1144+void GGLX86Assembler::build_logic_op(pixel_t& pixel, Scratch& regs)
1145+{
1146+ const needs_t& needs = mBuilderContext.needs;
1147+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
1148+ if (opcode == GGL_COPY)
1149+ return;
1150+
1151+ comment("logic operation");
1152+
1153+ pixel_t s(pixel);
1154+ if (!(pixel.flags & CORRUPTIBLE)) {
1155+ pixel.reg = regs.obtain();
1156+ pixel.flags |= CORRUPTIBLE;
1157+ }
1158+
1159+ pixel_t d(mDstPixel);
1160+ d.reg = regs.obtain();
1161+ MOV_MEM_TO_REG(mDstPixel.offset_ebp, EBP, d.reg);
1162+ switch(opcode) {
1163+ case GGL_CLEAR:
1164+ MOV_IMM_TO_REG(0, pixel.reg);
1165+ break;
1166+ case GGL_AND:
1167+ MOV_REG_TO_REG(d.reg, pixel.reg);
1168+ AND_REG_TO_REG(s.reg, pixel.reg);
1169+ break;
1170+ case GGL_AND_REVERSE:
1171+ MOV_REG_TO_REG(d.reg, pixel.reg);
1172+ NOT(pixel.reg);
1173+ AND_REG_TO_REG(s.reg, pixel.reg);
1174+ break;
1175+ case GGL_COPY:
1176+ break;
1177+ case GGL_AND_INVERTED:
1178+ MOV_REG_TO_REG(s.reg, pixel.reg);
1179+ NOT(pixel.reg);
1180+ AND_REG_TO_REG(d.reg, pixel.reg);
1181+ break;
1182+ case GGL_NOOP:
1183+ MOV_REG_TO_REG(d.reg, pixel.reg);
1184+ break;
1185+ case GGL_XOR:
1186+ MOV_REG_TO_REG(d.reg, pixel.reg);
1187+ XOR(s.reg, pixel.reg);
1188+ break;
1189+ case GGL_OR:
1190+ MOV_REG_TO_REG(d.reg, pixel.reg);
1191+ OR_REG_TO_REG(s.reg, pixel.reg);
1192+ break;
1193+ case GGL_NOR:
1194+ MOV_REG_TO_REG(d.reg, pixel.reg);
1195+ OR_REG_TO_REG(s.reg, pixel.reg);
1196+ NOT(pixel.reg);
1197+ break;
1198+ case GGL_EQUIV:
1199+ MOV_REG_TO_REG(d.reg, pixel.reg);
1200+ XOR(s.reg, pixel.reg);
1201+ NOT(pixel.reg);
1202+ break;
1203+ case GGL_INVERT:
1204+ MOV_REG_TO_REG(d.reg, pixel.reg);
1205+ NOT(pixel.reg);
1206+ break;
1207+ case GGL_OR_REVERSE: // s | ~d == ~(~s & d)
1208+ MOV_REG_TO_REG(s.reg, pixel.reg);
1209+ NOT(pixel.reg);
1210+ AND_REG_TO_REG(d.reg, pixel.reg);
1211+ NOT(pixel.reg);
1212+ break;
1213+ case GGL_COPY_INVERTED:
1214+ MOV_REG_TO_REG(s.reg, pixel.reg);
1215+ NOT(pixel.reg);
1216+ break;
1217+ case GGL_OR_INVERTED: // ~s | d == ~(s & ~d)
1218+ MOV_REG_TO_REG(d.reg, pixel.reg);
1219+ NOT(pixel.reg);
1220+ AND_REG_TO_REG(s.reg, pixel.reg);
1221+ NOT(pixel.reg);
1222+ break;
1223+ case GGL_NAND:
1224+ MOV_REG_TO_REG(d.reg, pixel.reg);
1225+ AND_REG_TO_REG(s.reg, pixel.reg);
1226+ NOT(pixel.reg);
1227+ break;
1228+ case GGL_SET:
1229+ MOV_IMM_TO_REG(0, pixel.reg);
1230+ NOT(pixel.reg);
1231+ break;
1232+ };
1233+ regs.recycle(d.reg);
1234+}
1235+
1236+// ---------------------------------------------------------------------------
1237+
1238+
1239+void GGLX86Assembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
1240+{
1241+ uint32_t rot;
1242+ uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
1243+ mask &= size;
1244+
1245+ if (mask == size) {
1246+ if (d != s)
1247+ MOV_REG_TO_REG(s, d);
1248+ return;
1249+ }
1250+
1251+ MOV_REG_TO_REG(s, d);
1252+ AND_IMM_TO_REG(mask, d);
1253+}
1254+
1255+void GGLX86Assembler::build_masking(pixel_t& pixel, Scratch& regs)
1256+{
1257+ if (!mMasking || mAllMasked) {
1258+ return;
1259+ }
1260+
1261+ comment("color mask");
1262+
1263+ pixel_t fb(mDstPixel);
1264+ fb.reg = regs.obtain();
1265+ MOV_MEM_TO_REG(mDstPixel.offset_ebp, EBP, fb.reg);
1266+ pixel_t s(pixel);
1267+ if (!(pixel.flags & CORRUPTIBLE)) {
1268+ pixel.reg = regs.obtain();
1269+ pixel.flags |= CORRUPTIBLE;
1270+ }
1271+
1272+ int mask = 0;
1273+ for (int i=0 ; i<4 ; i++) {
1274+ const int component_mask = 1<<i;
1275+ const int h = fb.format.c[i].h;
1276+ const int l = fb.format.c[i].l;
1277+ if (h && (!(mMasking & component_mask))) {
1278+ mask |= ((1<<(h-l))-1) << l;
1279+ }
1280+ }
1281+
1282+ // There is no need to clear the masked components of the source
1283+ // (unless we applied a logic op), because they're already zeroed
1284+ // by construction (masked components are not computed)
1285+
1286+ if (mLogicOp) {
1287+ const needs_t& needs = mBuilderContext.needs;
1288+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
1289+ if (opcode != GGL_CLEAR) {
1290+ // clear masked component of source
1291+ build_and_immediate(pixel.reg, s.reg, mask, fb.size());
1292+ s = pixel;
1293+ }
1294+ }
1295+
1296+ // clear non masked components of destination
1297+ build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
1298+
1299+ // or back the channels that were masked
1300+ if (s.reg == fb.reg) {
1301+ // this is in fact a MOV
1302+ if (s.reg == pixel.reg) {
1303+ // ugh. this in in fact a nop
1304+ } else {
1305+ MOV_REG_TO_REG(fb.reg, pixel.reg);
1306+ }
1307+ } else {
1308+ MOV_REG_TO_REG(fb.reg, pixel.reg);
1309+ OR_REG_TO_REG(s.reg, pixel.reg);
1310+ }
1311+ MOV_REG_TO_MEM(fb.reg, mDstPixel.offset_ebp, EBP);
1312+}
1313+
1314+// ---------------------------------------------------------------------------
1315+
1316+void GGLX86Assembler::base_offset(pointer_t& d, pointer_t& b, const reg_t& o)
1317+{
1318+// d and b are the same reference
1319+ Scratch scratches(registerFile());
1320+ int temp_reg = scratches.obtain();
1321+ switch (b.size) {
1322+ case 32:
1323+ MOV_REG_TO_REG(b.reg, temp_reg);
1324+ MOV_REG_TO_REG(o.reg, d.reg);
1325+ SHL(2,d.reg);
1326+ ADD_REG_TO_REG(temp_reg, d.reg);
1327+ break;
1328+ case 24:
1329+ if (d.reg == b.reg) {
1330+ MOV_REG_TO_REG(b.reg, temp_reg);
1331+ MOV_REG_TO_REG(o.reg, d.reg);
1332+ SHL(1,d.reg);
1333+ ADD_REG_TO_REG(temp_reg, d.reg);
1334+ ADD_REG_TO_REG(o.reg, d.reg);
1335+ } else {
1336+ MOV_REG_TO_REG(o.reg, temp_reg);
1337+ SHL(1,temp_reg);
1338+ MOV_REG_TO_REG(temp_reg, d.reg);
1339+ ADD_REG_TO_REG(o.reg, d.reg);
1340+ ADD_REG_TO_REG(b.reg, d.reg);
1341+ }
1342+ break;
1343+ case 16:
1344+ MOV_REG_TO_REG(b.reg, temp_reg);
1345+ MOV_REG_TO_REG(o.reg, d.reg);
1346+ SHL(1,d.reg);
1347+ ADD_REG_TO_REG(temp_reg, d.reg);
1348+ break;
1349+ case 8:
1350+ MOV_REG_TO_REG(b.reg, temp_reg);
1351+ MOV_REG_TO_REG(o.reg, d.reg);
1352+ ADD_REG_TO_REG(temp_reg, d.reg);
1353+ break;
1354+ }
1355+ scratches.recycle(temp_reg);
1356+}
1357+
1358+// ----------------------------------------------------------------------------
1359+// cheezy register allocator...
1360+// ----------------------------------------------------------------------------
1361+
1362+void X86RegisterAllocator::reset()
1363+{
1364+ mRegs.reset();
1365+}
1366+
1367+int X86RegisterAllocator::reserveReg(int reg)
1368+{
1369+ return mRegs.reserve(reg);
1370+}
1371+
1372+int X86RegisterAllocator::obtainReg()
1373+{
1374+ return mRegs.obtain();
1375+}
1376+
1377+void X86RegisterAllocator::recycleReg(int reg)
1378+{
1379+ mRegs.recycle(reg);
1380+}
1381+
1382+X86RegisterAllocator::RegisterFile& X86RegisterAllocator::registerFile()
1383+{
1384+ return mRegs;
1385+}
1386+
1387+// ----------------------------------------------------------------------------
1388+
1389+X86RegisterAllocator::RegisterFile::RegisterFile()
1390+ : mRegs(0), mTouched(0), mStatus(0)
1391+{
1392+ //reserve(PhysicalReg_EBP);
1393+ //reserve(PhysicalReg_ESP);
1394+}
1395+
1396+X86RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
1397+ : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
1398+{
1399+}
1400+
1401+X86RegisterAllocator::RegisterFile::~RegisterFile()
1402+{
1403+}
1404+
1405+bool X86RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1406+{
1407+ return (mRegs == rhs.mRegs);
1408+}
1409+
1410+void X86RegisterAllocator::RegisterFile::reset()
1411+{
1412+ mRegs = mTouched = mStatus = 0;
1413+}
1414+
1415+int X86RegisterAllocator::RegisterFile::reserve(int reg)
1416+{
1417+ LOG_ALWAYS_FATAL_IF(isUsed(reg),
1418+ "reserving register %d, but already in use",
1419+ reg);
1420+ if(isUsed(reg)) return -1;
1421+ mRegs |= (1<<reg);
1422+ mTouched |= mRegs;
1423+ return reg;
1424+}
1425+
1426+void X86RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1427+{
1428+ mRegs |= regMask;
1429+ mTouched |= regMask;
1430+}
1431+
1432+int X86RegisterAllocator::RegisterFile::isUsed(int reg) const
1433+{
1434+ LOG_ALWAYS_FATAL_IF(reg>=6, "invalid register %d", reg);
1435+ return mRegs & (1<<reg);
1436+}
1437+
1438+int X86RegisterAllocator::RegisterFile::obtain()
1439+{
1440+//multiplication result is in edx:eax
1441+//ebx, ecx, edi, esi, eax, edx
1442+ const char priorityList[6] = { PhysicalReg_EBX, PhysicalReg_ECX,PhysicalReg_EDI, PhysicalReg_ESI, PhysicalReg_EAX, PhysicalReg_EDX };
1443+
1444+ const int nbreg = sizeof(priorityList);
1445+ int i, r;
1446+ for (i=0 ; i<nbreg ; i++) {
1447+ r = priorityList[i];
1448+ if (!isUsed(r)) {
1449+ break;
1450+ }
1451+ }
1452+ // this is not an error anymore because, we'll try again with
1453+ // a lower optimization level.
1454+ ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1455+ if (i >= nbreg) {
1456+ mStatus |= OUT_OF_REGISTERS;
1457+ // we return SP so we can more easily debug things
1458+ // the code will never be run anyway.
1459+ printf("pixelflinger ran out of registers\n");
1460+ return PhysicalReg_ESP;
1461+ //return -1;
1462+ }
1463+ reserve(r);
1464+ return r;
1465+}
1466+
1467+bool X86RegisterAllocator::RegisterFile::hasFreeRegs() const
1468+{
1469+ return ((mRegs & 0x3F) == 0x3F) ? false : true;
1470+}
1471+
1472+int X86RegisterAllocator::RegisterFile::countFreeRegs() const
1473+{
1474+ int f = ~mRegs & 0x3F;
1475+ // now count number of 1
1476+ f = (f & 0x5555) + ((f>>1) & 0x5555);
1477+ f = (f & 0x3333) + ((f>>2) & 0x3333);
1478+ f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1479+ f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1480+ return f;
1481+}
1482+
1483+void X86RegisterAllocator::RegisterFile::recycle(int reg)
1484+{
1485+ LOG_FATAL_IF(!isUsed(reg),
1486+ "recycling unallocated register %d",
1487+ reg);
1488+ mRegs &= ~(1<<reg);
1489+}
1490+
1491+void X86RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1492+{
1493+ LOG_FATAL_IF((mRegs & regMask)!=regMask,
1494+ "recycling unallocated registers "
1495+ "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1496+ regMask, mRegs, mRegs&regMask);
1497+ mRegs &= ~regMask;
1498+}
1499+
1500+uint32_t X86RegisterAllocator::RegisterFile::touched() const
1501+{
1502+ return mTouched;
1503+}
1504+
1505+// ----------------------------------------------------------------------------
1506+
1507+}; // namespace android
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/GGLX86Assembler.h
@@ -0,0 +1,563 @@
1+/* libs/pixelflinger/codeflinger/x86/GGLX86Assembler.h
2+**
3+** Copyright 2006, The Android Open Source Project
4+**
5+** Licensed under the Apache License, Version 2.0 (the "License");
6+** you may not use this file except in compliance with the License.
7+** You may obtain a copy of the License at
8+**
9+** http://www.apache.org/licenses/LICENSE-2.0
10+**
11+** Unless required by applicable law or agreed to in writing, software
12+** distributed under the License is distributed on an "AS IS" BASIS,
13+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+** See the License for the specific language governing permissions and
15+** limitations under the License.
16+*/
17+
18+
19+#ifndef ANDROID_GGLX86ASSEMBLER_H
20+#define ANDROID_GGLX86ASSEMBLER_H
21+
22+#include <stdint.h>
23+#include <sys/types.h>
24+
25+#include <private/pixelflinger/ggl_context.h>
26+
27+#include "codeflinger/x86/X86Assembler.h"
28+
29+
30+namespace android {
31+
32+// ----------------------------------------------------------------------------
33+
34+#define CONTEXT_LOAD(REG, FIELD) \
35+ MOV_MEM_TO_REG(GGL_OFFSETOF(FIELD), mBuilderContext.Rctx, REG)
36+
37+#define CONTEXT_STORE(REG, FIELD) \
38+ MOV_REG_TO_MEM(REG, GGL_OFFSETOF(FIELD), mBuilderContext.Rctx)
39+
40+class X86RegisterAllocator
41+{
42+public:
43+ class RegisterFile;
44+
45+ RegisterFile& registerFile();
46+ int reserveReg(int reg);
47+ int obtainReg();
48+ void recycleReg(int reg);
49+ void reset();
50+
51+ class RegisterFile
52+ {
53+ public:
54+ RegisterFile();
55+ RegisterFile(const RegisterFile& rhs);
56+ ~RegisterFile();
57+
58+ void reset();
59+
60+ bool operator == (const RegisterFile& rhs) const;
61+ bool operator != (const RegisterFile& rhs) const {
62+ return !operator == (rhs);
63+ }
64+
65+ int reserve(int reg);
66+ void reserveSeveral(uint32_t regMask);
67+
68+ void recycle(int reg);
69+ void recycleSeveral(uint32_t regMask);
70+
71+ int obtain();
72+ inline int isUsed(int reg) const;
73+
74+ bool hasFreeRegs() const;
75+ int countFreeRegs() const;
76+
77+ uint32_t touched() const;
78+ inline uint32_t status() const { return mStatus; }
79+
80+ enum {
81+ OUT_OF_REGISTERS = 0x1
82+ };
83+
84+ private:
85+ uint32_t mRegs;
86+ uint32_t mTouched;
87+ uint32_t mStatus;
88+ };
89+
90+ class Scratch
91+ {
92+ public:
93+ Scratch(RegisterFile& regFile)
94+ : mRegFile(regFile), mScratch(0) {
95+ }
96+ ~Scratch() {
97+ mRegFile.recycleSeveral(mScratch);
98+ }
99+ int obtain() {
100+ int reg = mRegFile.obtain();
101+ mScratch |= 1<<reg;
102+ return reg;
103+ }
104+ void reserve(int reg) {
105+ mRegFile.reserve(reg);
106+ mScratch |= 1<<reg;
107+ }
108+ void recycle(int reg) {
109+ mRegFile.recycle(reg);
110+ mScratch &= ~(1<<reg);
111+ }
112+ bool isUsed(int reg) {
113+ return (mScratch & (1<<reg));
114+ }
115+ int countFreeRegs() {
116+ return mRegFile.countFreeRegs();
117+ }
118+ private:
119+ RegisterFile& mRegFile;
120+ uint32_t mScratch;
121+ };
122+
123+/*
124+// currently we don't use it
125+
126+ class Spill
127+ {
128+ public:
129+ Spill(RegisterFile& regFile, X86Assembler& gen, uint32_t reglist)
130+ : mRegFile(regFile), mGen(gen), mRegList(reglist), mCount(0)
131+ {
132+ if (reglist) {
133+ int count = 0;
134+ while (reglist) {
135+ count++;
136+ reglist &= ~(1 << (31 - __builtin_clz(reglist)));
137+ }
138+ if (count == 1) {
139+ int reg = 31 - __builtin_clz(mRegList);
140+ // move to the stack
141+ } else {
142+ // move to the stack
143+ }
144+ mRegFile.recycleSeveral(mRegList);
145+ mCount = count;
146+ }
147+ }
148+ ~Spill() {
149+ if (mRegList) {
150+ if (mCount == 1) {
151+ int reg = 31 - __builtin_clz(mRegList);
152+ // move to the stack
153+ } else {
154+ }
155+ mRegFile.reserveSeveral(mRegList);
156+ }
157+ }
158+ private:
159+ RegisterFile& mRegFile;
160+ X86Assembler& mGen;
161+ uint32_t mRegList;
162+ int mCount;
163+ };
164+*/
165+
166+private:
167+ RegisterFile mRegs;
168+};
169+
170+// ----------------------------------------------------------------------------
171+
172+class GGLX86Assembler : public X86Assembler, public X86RegisterAllocator
173+{
174+public:
175+
176+ GGLX86Assembler(const sp<Assembly>& assembly);
177+ ~GGLX86Assembler();
178+
179+ char* base() const { return 0; } // XXX
180+ char* pc() const { return 0; } // XXX
181+
182+ void reset(int opt_level);
183+
184+
185+ // generate scanline code for given needs
186+ int scanline(const needs_t& needs, context_t const* c);
187+ int scanline_core(const needs_t& needs, context_t const* c);
188+
189+ enum {
190+ CLEAR_LO = 0x0001,
191+ CLEAR_HI = 0x0002,
192+ CORRUPTIBLE = 0x0004,
193+ FIRST = 0x0008
194+ };
195+
196+ enum { //load/store flags
197+ WRITE_BACK = 0x0001
198+ };
199+
200+ struct reg_t {
201+ reg_t() : reg(-1), flags(0), offset_ebp(0) {
202+ }
203+ reg_t(int r, int f=0, int offset=0)
204+ : reg(r), flags(f), offset_ebp(offset) {
205+ }
206+ void setTo(int r, int f=0, int offset=0) {
207+ reg=r; flags=f; offset_ebp=offset;
208+ }
209+ int reg;
210+ uint16_t flags;
211+ int offset_ebp;
212+ };
213+
214+ struct integer_t : public reg_t {
215+ integer_t() : reg_t(), s(0) {
216+ }
217+ integer_t(int r, int sz=32, int f=0, int offset=0)
218+ : reg_t(r, f, offset), s(sz) {
219+ }
220+ void setTo(int r, int sz=32, int f=0, int offset=0) {
221+ reg_t::setTo(r, f, offset); s=sz;
222+ }
223+ int8_t s;
224+ inline int size() const { return s; }
225+ };
226+
227+ struct pixel_t : public reg_t {
228+ pixel_t() : reg_t() {
229+ memset(&format, 0, sizeof(GGLFormat));
230+ }
231+ pixel_t(int r, const GGLFormat* fmt, int f=0, int offset=0)
232+ : reg_t(r, f, offset), format(*fmt) {
233+ }
234+ void setTo(int r, const GGLFormat* fmt, int f=0, int offset=0) {
235+ reg_t::setTo(r, f, offset); format = *fmt;
236+ }
237+ GGLFormat format;
238+ inline int hi(int c) const { return format.c[c].h; }
239+ inline int low(int c) const { return format.c[c].l; }
240+ inline int mask(int c) const { return ((1<<size(c))-1) << low(c); }
241+ inline int size() const { return format.size*8; }
242+ inline int size(int c) const { return component_size(c); }
243+ inline int component_size(int c) const { return hi(c) - low(c); }
244+ };
245+
246+ struct component_t : public reg_t {
247+ component_t() : reg_t(), h(0), l(0) {
248+ }
249+ component_t(int r, int f=0, int offset=0)
250+ : reg_t(r, f, offset), h(0), l(0) {
251+ }
252+ component_t(int r, int lo, int hi, int f=0, int offset=0)
253+ : reg_t(r, f, offset), h(hi), l(lo) {
254+ }
255+ explicit component_t(const integer_t& rhs)
256+ : reg_t(rhs.reg, rhs.flags, rhs.offset_ebp), h(rhs.s), l(0) {
257+ }
258+ explicit component_t(const pixel_t& rhs, int component) {
259+ setTo( rhs.reg,
260+ rhs.format.c[component].l,
261+ rhs.format.c[component].h,
262+ rhs.flags|CLEAR_LO|CLEAR_HI, rhs.offset_ebp);
263+ }
264+ void setTo(int r, int lo=0, int hi=0, int f=0, int offset=0) {
265+ reg_t::setTo(r, f, offset); h=hi; l=lo;
266+ }
267+ int8_t h;
268+ int8_t l;
269+ inline int size() const { return h-l; }
270+ };
271+
272+ struct pointer_t : public reg_t {
273+ pointer_t() : reg_t(), size(0) {
274+ }
275+ pointer_t(int r, int s, int f=0, int offset=0)
276+ : reg_t(r, f, offset), size(s) {
277+ }
278+ void setTo(int r, int s, int f=0, int offset=0) {
279+ reg_t::setTo(r, f, offset); size=s;
280+ }
281+ int8_t size;
282+ };
283+
284+
285+private:
286+ struct tex_coord_t {
287+ reg_t s;
288+ reg_t t;
289+ pointer_t ptr;
290+ };
291+
292+ struct fragment_parts_t {
293+ uint32_t packed : 1;
294+ uint32_t reload : 2;
295+ uint32_t iterated_packed : 1;
296+ pixel_t iterated;
297+ pointer_t cbPtr;
298+ pointer_t covPtr;
299+ reg_t count;
300+ reg_t argb[4];
301+ reg_t argb_dx[4];
302+ reg_t z;
303+ reg_t dither;
304+ pixel_t texel[GGL_TEXTURE_UNIT_COUNT];
305+ tex_coord_t coords[GGL_TEXTURE_UNIT_COUNT];
306+ };
307+
308+ struct texture_unit_t {
309+ int format_idx;
310+ GGLFormat format;
311+ int bits;
312+ int swrap;
313+ int twrap;
314+ int env;
315+ int pot;
316+ int linear;
317+ uint8_t mask;
318+ uint8_t replaced;
319+ };
320+
321+ struct texture_machine_t {
322+ texture_unit_t tmu[GGL_TEXTURE_UNIT_COUNT];
323+ uint8_t mask;
324+ uint8_t replaced;
325+ uint8_t directTexture;
326+ uint8_t activeUnits;
327+ };
328+
329+ struct component_info_t {
330+ bool masked : 1;
331+ bool inDest : 1;
332+ bool needed : 1;
333+ bool replaced : 1;
334+ bool iterated : 1;
335+ bool smooth : 1;
336+ bool blend : 1;
337+ bool fog : 1;
338+ };
339+
340+ struct builder_context_t {
341+ context_t const* c;
342+ needs_t needs;
343+ int Rctx;
344+ };
345+
346+ template <typename T>
347+ void modify(T& r, Scratch& regs)
348+ {
349+ if (!(r.flags & CORRUPTIBLE)) {
350+ r.reg = regs.obtain();
351+ r.flags |= CORRUPTIBLE;
352+ }
353+ }
354+
355+ // helpers
356+ void base_offset(pointer_t& d, pointer_t& b, const reg_t& o);
357+
358+ // texture environement
359+ void modulate( component_t& dest,
360+ const component_t& incoming,
361+ const pixel_t& texel, int component);
362+
363+ void decal( component_t& dest,
364+ const component_t& incoming,
365+ const pixel_t& texel, int component);
366+
367+ void blend( component_t& dest,
368+ const component_t& incoming,
369+ const pixel_t& texel, int component, int tmu);
370+
371+ void add( component_t& dest,
372+ const component_t& incoming,
373+ const pixel_t& texel, int component);
374+
375+ // load/store stuff
376+ void store(const pointer_t& addr, const pixel_t& src, uint32_t flags=0);
377+ void load(pointer_t& addr, const pixel_t& dest, uint32_t flags=0);
378+
379+ void extract(integer_t& d, const pixel_t& s, int component);
380+ void extract(component_t& d, const pixel_t& s, int component);
381+ void extract(integer_t& d, int s, int h, int l, int bits=32);
382+ void expand(integer_t& d, const integer_t& s, int dbits);
383+ void expand(integer_t& d, const component_t& s, int dbits);
384+ void expand(component_t& d, const component_t& s, int dbits);
385+ void downshift(pixel_t& d, int component, component_t s, reg_t& dither);
386+
387+
388+ void mul_factor( component_t& d,
389+ const integer_t& v,
390+ const integer_t& f, Scratch& scratches);
391+
392+ void mul_factor_add( component_t& d,
393+ const integer_t& v,
394+ const integer_t& f,
395+ const component_t& a);
396+
397+ void component_add( component_t& d,
398+ const integer_t& dst,
399+ const integer_t& src);
400+
401+ void component_sat( const component_t& v, const int temp_reg);
402+
403+
404+ void build_scanline_preparation(fragment_parts_t& parts,
405+ const needs_t& needs);
406+
407+ void build_smooth_shade(fragment_parts_t& parts);
408+
409+ void build_component( pixel_t& pixel,
410+ fragment_parts_t& parts,
411+ int component,
412+ Scratch& global_scratches);
413+
414+ void build_incoming_component(
415+ component_t& temp,
416+ int dst_size,
417+ fragment_parts_t& parts,
418+ int component,
419+ Scratch& scratches,
420+ Scratch& global_scratches);
421+
422+ void init_iterated_color(fragment_parts_t& parts, const reg_t& x);
423+
424+ void build_iterated_color( component_t& fragment,
425+ fragment_parts_t& parts,
426+ int component,
427+ Scratch& regs);
428+
429+ void decodeLogicOpNeeds(const needs_t& needs);
430+
431+ void decodeTMUNeeds(const needs_t& needs, context_t const* c);
432+
433+ void init_textures( tex_coord_t* coords,
434+ const reg_t& x,
435+ const reg_t& y);
436+
437+ void build_textures( fragment_parts_t& parts,
438+ Scratch& regs);
439+
440+ void filter8( const fragment_parts_t& parts,
441+ pixel_t& texel, const texture_unit_t& tmu,
442+ reg_t reg_U, reg_t reg_V, pointer_t& txPtr,
443+ int FRAC_BITS, Scratch& scratches);
444+
445+ void filter16( const fragment_parts_t& parts,
446+ pixel_t& texel, const texture_unit_t& tmu,
447+ reg_t reg_U, reg_t reg_V, pointer_t& txPtr,
448+ int FRAC_BITS, Scratch& scratches);
449+
450+ void filter24( const fragment_parts_t& parts,
451+ pixel_t& texel, const texture_unit_t& tmu,
452+ int U, int V, pointer_t& txPtr,
453+ int FRAC_BITS);
454+
455+ void filter32( const fragment_parts_t& parts,
456+ pixel_t& texel, const texture_unit_t& tmu,
457+ reg_t reg_U, reg_t reg_V, pointer_t& txPtr,
458+ int FRAC_BITS, Scratch& scratches);
459+
460+ void build_texture_environment( component_t& fragment,
461+ fragment_parts_t& parts,
462+ int component,
463+ Scratch& regs);
464+
465+ void wrapping( int d,
466+ int coord, int size,
467+ int tx_wrap, int tx_linear, Scratch& scratches);
468+
469+ void build_fog( component_t& temp,
470+ int component,
471+ Scratch& parent_scratches);
472+
473+ void build_blending( component_t& in_out,
474+ pixel_t& pixel,
475+ int component,
476+ Scratch& parent_scratches);
477+
478+ void build_blend_factor(
479+ integer_t& factor, int f, int component,
480+ const pixel_t& dst_pixel,
481+ integer_t& fragment,
482+ integer_t& fb,
483+ Scratch& scratches);
484+
485+ void build_blendFOneMinusF( component_t& temp,
486+ const integer_t& factor,
487+ const integer_t& fragment,
488+ const integer_t& fb);
489+
490+ void build_blendOneMinusFF( component_t& temp,
491+ const integer_t& factor,
492+ const integer_t& fragment,
493+ const integer_t& fb);
494+
495+ void build_coverage_application(component_t& fragment,
496+ fragment_parts_t& parts,
497+ Scratch& regs);
498+
499+ void build_alpha_test(component_t& fragment, const fragment_parts_t& parts);
500+
501+ enum { Z_TEST=1, Z_WRITE=2 };
502+ void build_depth_test(const fragment_parts_t& parts, uint32_t mask);
503+ void build_iterate_z(const fragment_parts_t& parts);
504+ void build_iterate_f(const fragment_parts_t& parts);
505+ void build_iterate_texture_coordinates(const fragment_parts_t& parts);
506+
507+ void build_logic_op(pixel_t& pixel, Scratch& regs);
508+
509+ void build_masking(pixel_t& pixel, Scratch& regs);
510+
511+ void build_and_immediate(int d, int s, uint32_t mask, int bits);
512+
513+ bool isAlphaSourceNeeded() const;
514+
515+ enum {
516+ FACTOR_SRC=1, FACTOR_DST=2, BLEND_SRC=4, BLEND_DST=8
517+ };
518+
519+ enum {
520+ LOGIC_OP=1, LOGIC_OP_SRC=2, LOGIC_OP_DST=4
521+ };
522+
523+ static int blending_codes(int fs, int fd);
524+
525+ builder_context_t mBuilderContext;
526+ texture_machine_t mTextureMachine;
527+ component_info_t mInfo[4];
528+ int mBlending;
529+ int mMasking;
530+ int mAllMasked;
531+ int mLogicOp;
532+ int mAlphaTest;
533+ int mAA;
534+ int mDithering;
535+ int mDepthTest;
536+
537+ int mSmooth;
538+ int mFog;
539+ pixel_t mDstPixel;
540+
541+ GGLFormat mCbFormat;
542+
543+ int mBlendFactorCached;
544+ integer_t mAlphaSource;
545+
546+ int mBaseRegister;
547+
548+ int mBlendSrc;
549+ int mBlendDst;
550+ int mBlendSrcA;
551+ int mBlendDstA;
552+
553+ int mOptLevel;
554+
555+ // to stretch esp and shrink esp
556+ int mCurSp;
557+};
558+
559+// ----------------------------------------------------------------------------
560+
561+}; // namespace android
562+
563+#endif // ANDROID_GGLX86ASSEMBLER_H
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/X86Assembler.cpp
@@ -0,0 +1,618 @@
1+/* libs/pixelflinger/codeflinger/x86/X86Assembler.cpp
2+**
3+** Copyright 2006, The Android Open Source Project
4+**
5+** Licensed under the Apache License, Version 2.0 (the "License");
6+** you may not use this file except in compliance with the License.
7+** You may obtain a copy of the License at
8+**
9+** http://www.apache.org/licenses/LICENSE-2.0
10+**
11+** Unless required by applicable law or agreed to in writing, software
12+** distributed under the License is distributed on an "AS IS" BASIS,
13+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+** See the License for the specific language governing permissions and
15+** limitations under the License.
16+*/
17+
18+#define LOG_TAG "X86Assembler"
19+
20+#include <stdio.h>
21+#include <stdlib.h>
22+#include <cutils/log.h>
23+#include <cutils/properties.h>
24+#include <string.h>
25+
26+#if defined(WITH_LIB_HARDWARE)
27+#include <hardware_legacy/qemu_tracing.h>
28+#endif
29+
30+#include <private/pixelflinger/ggl_context.h>
31+
32+#include "codeflinger/CodeCache.h"
33+#include "codeflinger/x86/X86Assembler.h"
34+
35+// ----------------------------------------------------------------------------
36+
37+namespace android {
38+
39+// ----------------------------------------------------------------------------
40+
41+X86Assembler::X86Assembler(const sp<Assembly>& assembly)
42+ : mAssembly(assembly)
43+{
44+ mBase = mStream = (char *)assembly->base();
45+ mDuration = ggl_system_time();
46+#if defined(WITH_LIB_HARDWARE)
47+ mQemuTracing = true;
48+#endif
49+}
50+
51+X86Assembler::~X86Assembler()
52+{
53+}
54+
55+char* X86Assembler::pc() const
56+{
57+ return mStream;
58+}
59+
60+char* X86Assembler::base() const
61+{
62+ return mBase;
63+}
64+
65+void X86Assembler::reset()
66+{
67+ mBase = mStream = (char *)mAssembly->base();
68+ mBranchTargets.clear();
69+ mLabels.clear();
70+ mLabelsInverseMapping.clear();
71+ mComments.clear();
72+}
73+
74+// ----------------------------------------------------------------------------
75+
76+void X86Assembler::disassemble(const char* name)
77+{
78+ if (name) {
79+ printf("%s:\n", name);
80+ }
81+ size_t count = pc()-base();
82+ unsigned insLength;
83+ unsigned insSize;
84+ char* curStream = (char*)base();
85+ while (count>0) {
86+ ssize_t label = mLabelsInverseMapping.indexOfKey(curStream);
87+ if (label >= 0) {
88+ printf("%s:\n", mLabelsInverseMapping.valueAt(label));
89+ }
90+ ssize_t comment = mComments.indexOfKey(curStream);
91+ if (comment >= 0) {
92+ printf("; %s\n", mComments.valueAt(comment));
93+ }
94+ insLength = decodeThenPrint(curStream);
95+ curStream = curStream + insLength;
96+ count = count - insLength;
97+ }
98+}
99+
100+void X86Assembler::comment(const char* string)
101+{
102+ mComments.add(mStream, string);
103+}
104+
105+void X86Assembler::label(const char* theLabel)
106+{
107+ mLabels.add(theLabel, mStream);
108+ mLabelsInverseMapping.add(mStream, theLabel);
109+}
110+
111+//the conditional jump
112+void X86Assembler::JCC(Mnemonic cc, const char* label) {
113+ switch (cc) {
114+ case Mnemonic_JO:
115+ encoder_imm(Mnemonic_JO, OpndSize_32, 0/*imm*/, mStream);
116+ break;
117+ case Mnemonic_JNO:
118+ encoder_imm(Mnemonic_JNO, OpndSize_32, 0/*imm*/, mStream);
119+ break;
120+ case Mnemonic_JB:
121+ encoder_imm(Mnemonic_JB, OpndSize_32, 0/*imm*/, mStream);
122+ break;
123+ case Mnemonic_JNB:
124+ encoder_imm(Mnemonic_JNB, OpndSize_32, 0/*imm*/, mStream);
125+ break;
126+ case Mnemonic_JZ:
127+ encoder_imm(Mnemonic_JZ, OpndSize_32, 0/*imm*/, mStream);
128+ break;
129+ case Mnemonic_JNZ:
130+ encoder_imm(Mnemonic_JNZ, OpndSize_32, 0/*imm*/, mStream);
131+ break;
132+ case Mnemonic_JBE:
133+ encoder_imm(Mnemonic_JBE, OpndSize_32, 0/*imm*/, mStream);
134+ break;
135+ case Mnemonic_JNBE:
136+ encoder_imm(Mnemonic_JNBE, OpndSize_32, 0/*imm*/, mStream);
137+ break;
138+ case Mnemonic_JS:
139+ encoder_imm(Mnemonic_JS, OpndSize_32, 0/*imm*/, mStream);
140+ break;
141+ case Mnemonic_JNS:
142+ encoder_imm(Mnemonic_JNS, OpndSize_32, 0/*imm*/, mStream);
143+ break;
144+ case Mnemonic_JP:
145+ encoder_imm(Mnemonic_JP, OpndSize_32, 0/*imm*/, mStream);
146+ break;
147+ case Mnemonic_JNP:
148+ encoder_imm(Mnemonic_JNP, OpndSize_32, 0/*imm*/, mStream);
149+ break;
150+ case Mnemonic_JL:
151+ encoder_imm(Mnemonic_JL, OpndSize_32, 0/*imm*/, mStream);
152+ break;
153+ case Mnemonic_JNL:
154+ encoder_imm(Mnemonic_JNL, OpndSize_32, 0/*imm*/, mStream);
155+ break;
156+ case Mnemonic_JLE:
157+ encoder_imm(Mnemonic_JLE, OpndSize_32, 0/*imm*/, mStream);
158+ break;
159+ case Mnemonic_JNLE:
160+ encoder_imm(Mnemonic_JNLE, OpndSize_32, 0/*imm*/, mStream);
161+ break;
162+ default :
163+ printf("the condition is not supported.\n");
164+ return;
165+ }
166+ mStreamNext = mStream + encoder_get_inst_size(mStream);
167+ //the offset is relative to the next instruction of the current PC
168+ mBranchTargets.add(branch_target_t(label, mStream, mStreamNext));
169+ mStream = mStreamNext;
170+}
171+
172+void X86Assembler::JMP(const char* label) {
173+ encoder_imm(Mnemonic_JMP, OpndSize_32, 0/*imm*/, mStream);
174+ mStreamNext = mStream + encoder_get_inst_size(mStream);
175+ mBranchTargets.add(branch_target_t(label, mStream, mStreamNext));
176+ mStream = mStreamNext;
177+}
178+
179+void X86Assembler::prepare_esp(int old_offset)
180+{
181+ mStreamUpdate = mStream;
182+ SUB_IMM_TO_REG(old_offset, ESP);
183+}
184+
185+void X86Assembler::update_esp(int new_offset)
186+{
187+ encoder_update_imm_rm(new_offset, mStreamUpdate);
188+}
189+
190+void X86Assembler::shrink_esp(int shrink_offset)
191+{
192+ ADD_IMM_TO_REG(shrink_offset, ESP);
193+}
194+
195+void X86Assembler::callee_work()
196+{
197+ //push EBX, ESI, EDI which need to be done in callee
198+ /*
199+ push %ebp
200+ mov %esp,%ebp
201+ push %ebx
202+ push %esi
203+ push %edi
204+ */
205+ PUSH(EBP);
206+ MOV_REG_TO_REG(ESP, EBP);
207+ PUSH(EBX);
208+ PUSH(ESI);
209+ PUSH(EDI);
210+}
211+
212+void X86Assembler::return_work()
213+{
214+// pop %esi
215+// pop %edi
216+// pop %ebx
217+// movl %ebp,%esp
218+// pop %ebp
219+// ret
220+// ret is equivalent to below
221+// pop %eax // the return address
222+// jmp *%eax
223+ POP(EDI);
224+ POP(ESI);
225+ POP(EBX);
226+ POP(EBP);
227+ encoder_return(mStream);
228+ mStream = mStream + encoder_get_inst_size(mStream);
229+}
230+
231+int X86Assembler::generate(const char* name)
232+{
233+ // fixup all the branches
234+ size_t count = mBranchTargets.size();
235+ while (count--) {
236+ const branch_target_t& bt = mBranchTargets[count];
237+ char* target_pc = mLabels.valueFor(bt.label);
238+ LOG_ALWAYS_FATAL_IF(!target_pc,
239+ "error resolving branch targets, target_pc is null");
240+ //the offset is relative to the next instruction of the current PC
241+ int32_t offset = int32_t(target_pc - bt.next_pc);
242+ encoder_update_imm(offset, bt.pc);
243+ }
244+
245+ mAssembly->resize((int)(pc()-base()));
246+
247+ // the instruction cache is flushed by CodeCache
248+ const int64_t duration = ggl_system_time() - mDuration;
249+ const char * const format = "generated %s (%d ins size) at [%p:%p] in %lld ns\n";
250+ ALOGI(format, name, int(pc()-base()), base(), pc(), duration);
251+
252+#if defined(WITH_LIB_HARDWARE)
253+ if (__builtin_expect(mQemuTracing, 0)) {
254+ int err = qemu_add_mapping(uintptr_t(base()), name);
255+ mQemuTracing = (err >= 0);
256+ }
257+#endif
258+
259+ char value[PROPERTY_VALUE_MAX];
260+ property_get("debug.pf.disasm", value, "0");
261+ if (atoi(value) != 0) {
262+ printf(format, name, int(pc()-base()), base(), pc(), duration);
263+ disassemble(name);
264+ }
265+
266+ return NO_ERROR;
267+}
268+
269+char* X86Assembler::pcForLabel(const char* label)
270+{
271+ return mLabels.valueFor(label);
272+}
273+
274+// ----------------------------------------------------------------------------
275+
276+void X86Assembler::PUSH(int reg) {
277+ encoder_reg(Mnemonic_PUSH, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
278+ mStream = mStream + encoder_get_inst_size(mStream);
279+}
280+
281+void X86Assembler::POP(int reg) {
282+ encoder_reg(Mnemonic_POP, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
283+ mStream = mStream + encoder_get_inst_size(mStream);
284+}
285+
286+//arithmetic
287+void X86Assembler::ADD_REG_TO_REG(int src, int dst) {
288+ encoder_reg_reg(Mnemonic_ADD, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream);
289+ mStream = mStream + encoder_get_inst_size(mStream);
290+}
291+
292+void X86Assembler::ADD_IMM_TO_REG(int imm, int dst) {
293+ encoder_imm_reg(Mnemonic_ADD, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
294+ mStream = mStream + encoder_get_inst_size(mStream);
295+}
296+
297+void X86Assembler::ADD_IMM_TO_MEM(int imm, int disp, int dst) {
298+ encoder_imm_mem(Mnemonic_ADD, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream);
299+ mStream = mStream + encoder_get_inst_size(mStream);
300+}
301+
302+void X86Assembler::ADD_MEM_TO_REG(int base_reg, int disp, int dst) {
303+ encoder_mem_reg(Mnemonic_ADD, OpndSize_32, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/,LowOpndRegType_gp, mStream);
304+ mStream = mStream + encoder_get_inst_size(mStream);
305+}
306+
307+void X86Assembler::ADD_REG_TO_MEM(int src, int base_reg, int disp) {
308+ encoder_reg_mem(Mnemonic_ADD, OpndSize_32, src, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream);
309+ mStream = mStream + encoder_get_inst_size(mStream);
310+}
311+
312+void X86Assembler::SUB_REG_TO_REG(int src, int dst) {
313+ encoder_reg_reg(Mnemonic_SUB, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream);
314+ mStream = mStream + encoder_get_inst_size(mStream);
315+}
316+
317+void X86Assembler::SUB_IMM_TO_REG(int imm, int dst) {
318+ encoder_imm_reg(Mnemonic_SUB, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
319+ mStream = mStream + encoder_get_inst_size(mStream);
320+}
321+
322+void X86Assembler::SUB_IMM_TO_MEM(int imm, int disp, int dst) {
323+ encoder_imm_mem(Mnemonic_SUB, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream);
324+ mStream = mStream + encoder_get_inst_size(mStream);
325+}
326+
327+void X86Assembler::SUB_REG_TO_MEM(int src, int base_reg, int disp) {
328+ encoder_reg_mem(Mnemonic_SUB, OpndSize_32, src, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream);
329+ mStream = mStream + encoder_get_inst_size(mStream);
330+}
331+
332+//test
333+void X86Assembler::TEST_REG_TO_REG(int src, int dst, OpndSize size) {
334+ encoder_reg_reg(Mnemonic_TEST, size, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream);
335+ mStream = mStream + encoder_get_inst_size(mStream);
336+}
337+
338+//compare
339+void X86Assembler::CMP_REG_TO_REG(int src, int dst, OpndSize size) {
340+ encoder_reg_reg(Mnemonic_CMP, size, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream);
341+ mStream = mStream + encoder_get_inst_size(mStream);
342+}
343+
344+void X86Assembler::CMP_IMM_TO_REG(int imm, int dst) {
345+ encoder_imm_reg(Mnemonic_CMP, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
346+ mStream = mStream + encoder_get_inst_size(mStream);
347+}
348+
349+void X86Assembler::CMP_MEM_TO_REG(int base_reg, int disp, int dst, OpndSize size) {
350+ encoder_mem_reg(Mnemonic_CMP, size, disp, base_reg, 0/*isBasePhysical*/,
351+ dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
352+ mStream = mStream + encoder_get_inst_size(mStream);
353+}
354+
355+void X86Assembler::CMP_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size)
356+{
357+ encoder_reg_mem(Mnemonic_CMP, size, reg, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream);
358+ mStream = mStream + encoder_get_inst_size(mStream);
359+}
360+
361+//logical
362+void X86Assembler::AND_REG_TO_REG(int src, int dst) {
363+ encoder_reg_reg(Mnemonic_AND, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream);
364+ mStream = mStream + encoder_get_inst_size(mStream);
365+}
366+
367+void X86Assembler::AND_IMM_TO_REG(int imm, int dst) {
368+ encoder_imm_reg(Mnemonic_AND, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
369+ mStream = mStream + encoder_get_inst_size(mStream);
370+}
371+
372+void X86Assembler::OR_REG_TO_REG(int src, int dst) {
373+ encoder_reg_reg(Mnemonic_OR, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream);
374+ mStream = mStream + encoder_get_inst_size(mStream);
375+}
376+
377+void X86Assembler::XOR(int src, int dst) {
378+ encoder_reg_reg(Mnemonic_XOR, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream);
379+ mStream = mStream + encoder_get_inst_size(mStream);
380+}
381+
382+void X86Assembler::OR_IMM_TO_REG(int imm, int dst) {
383+ encoder_imm_reg(Mnemonic_OR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
384+ mStream = mStream + encoder_get_inst_size(mStream);
385+}
386+
387+void X86Assembler::NOT(int dst) {
388+ encoder_reg(Mnemonic_NOT, OpndSize_32, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
389+ mStream = mStream + encoder_get_inst_size(mStream);
390+}
391+
392+void X86Assembler::NEG(int dst) {
393+ encoder_reg(Mnemonic_NEG, OpndSize_32, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
394+ mStream = mStream + encoder_get_inst_size(mStream);
395+}
396+//shift
397+void X86Assembler::SHL(int imm, int dst) {
398+ encoder_imm_reg(Mnemonic_SHL, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
399+ mStream = mStream + encoder_get_inst_size(mStream);
400+}
401+
402+void X86Assembler::SHL(int imm, int disp, int dst) {
403+ encoder_imm_mem(Mnemonic_SHL, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream);
404+ mStream = mStream + encoder_get_inst_size(mStream);
405+}
406+
407+void X86Assembler::SHR(int imm, int dst) {
408+ encoder_imm_reg(Mnemonic_SHR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
409+ mStream = mStream + encoder_get_inst_size(mStream);
410+}
411+
412+void X86Assembler::SHR(int imm, int disp, int dst) {
413+ encoder_imm_mem(Mnemonic_SHR, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream);
414+ mStream = mStream + encoder_get_inst_size(mStream);
415+}
416+
417+void X86Assembler::SAR(int imm, int dst) {
418+ encoder_imm_reg(Mnemonic_SAR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
419+ mStream = mStream + encoder_get_inst_size(mStream);
420+}
421+
422+void X86Assembler::ROR(const int imm, int dst) {
423+ encoder_imm_reg(Mnemonic_ROR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
424+ mStream = mStream + encoder_get_inst_size(mStream);
425+}
426+
427+void X86Assembler::ROR(int imm, int disp, int dst) {
428+ encoder_imm_mem(Mnemonic_ROR, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream);
429+ mStream = mStream + encoder_get_inst_size(mStream);
430+}
431+//signed extension
432+void X86Assembler::MOVSX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst) {
433+ encoder_moves_mem_to_reg(size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, mStream);
434+ mStream = mStream + encoder_get_inst_size(mStream);
435+}
436+
437+void X86Assembler::MOVSX_REG_TO_REG(OpndSize size, int src, int dst) {
438+ encoder_moves_reg_to_reg(size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
439+ mStream = mStream + encoder_get_inst_size(mStream);
440+}
441+//zero entension
442+void X86Assembler::MOVZX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst) {
443+ encoder_movez_mem_to_reg(size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, mStream);
444+ mStream = mStream + encoder_get_inst_size(mStream);
445+}
446+
447+void X86Assembler::MOVZX_REG_TO_REG(OpndSize size, int src, int dst) {
448+ encoder_movez_reg_to_reg(size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
449+ mStream = mStream + encoder_get_inst_size(mStream);
450+}
451+
452+// multiply...
453+// the first source operand is placed in EAX
454+void X86Assembler::IMUL(int reg) {
455+ encoder_reg(Mnemonic_IMUL, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
456+ mStream = mStream + encoder_get_inst_size(mStream);
457+}
458+
459+void X86Assembler::IMUL(int src, int dst) {
460+ encoder_reg_reg(Mnemonic_IMUL, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream);
461+ mStream = mStream + encoder_get_inst_size(mStream);
462+}
463+
464+void X86Assembler::MUL(int reg) {
465+ encoder_reg(Mnemonic_MUL, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
466+ mStream = mStream + encoder_get_inst_size(mStream);
467+}
468+
469+
470+// data transfer...
471+void X86Assembler::MOV_IMM_TO_REG(int32_t imm, int dst) {
472+ encoder_imm_reg(Mnemonic_MOV, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
473+ mStream = mStream + encoder_get_inst_size(mStream);
474+}
475+
476+void X86Assembler::MOV_REG_TO_REG(int src, int dst, OpndSize size)
477+{
478+ if(src == dst) return;
479+ encoder_reg_reg(Mnemonic_MOV, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
480+ mStream = mStream + encoder_get_inst_size(mStream);
481+}
482+
483+void X86Assembler::MOV_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size)
484+{
485+ encoder_reg_mem(Mnemonic_MOV, size, reg, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream);
486+ mStream = mStream + encoder_get_inst_size(mStream);
487+}
488+
489+void X86Assembler::MOV_MEM_TO_REG(int disp, int base_reg, int reg, OpndSize size)
490+{
491+ encoder_mem_reg(Mnemonic_MOV, size, disp, base_reg, 0/*isBasePhysical*/,
492+ reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
493+ mStream = mStream + encoder_get_inst_size(mStream);
494+}
495+
496+void X86Assembler::MOV_MEM_SCALE_TO_REG(int base_reg, int index_reg, int scale, int reg, OpndSize size)
497+{
498+ encoder_mem_scale_reg(Mnemonic_MOV, size, base_reg, 0/*isBasePhysical*/, index_reg, 0/*isIndexPhysical*/, scale, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
499+ mStream = mStream + encoder_get_inst_size(mStream);
500+}
501+// the conditional move
502+void X86Assembler::CMOV_REG_TO_REG(Mnemonic cc, int src, int dst, OpndSize size)
503+{
504+ switch (cc) {
505+ case Mnemonic_CMOVO:
506+ encoder_reg_reg(Mnemonic_CMOVO, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
507+ break;
508+ case Mnemonic_CMOVNO:
509+ encoder_reg_reg(Mnemonic_CMOVNO, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
510+ break;
511+ case Mnemonic_CMOVB:
512+ encoder_reg_reg(Mnemonic_CMOVB, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
513+ break;
514+ case Mnemonic_CMOVNB:
515+ encoder_reg_reg(Mnemonic_CMOVNB, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
516+ break;
517+ case Mnemonic_CMOVZ:
518+ encoder_reg_reg(Mnemonic_CMOVZ, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
519+ break;
520+ case Mnemonic_CMOVNZ:
521+ encoder_reg_reg(Mnemonic_CMOVNZ, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
522+ break;
523+ case Mnemonic_CMOVBE:
524+ encoder_reg_reg(Mnemonic_CMOVBE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
525+ break;
526+ case Mnemonic_CMOVNBE:
527+ encoder_reg_reg(Mnemonic_CMOVNBE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
528+ break;
529+ case Mnemonic_CMOVS:
530+ encoder_reg_reg(Mnemonic_CMOVS, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
531+ break;
532+ case Mnemonic_CMOVNS:
533+ encoder_reg_reg(Mnemonic_CMOVNS, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
534+ break;
535+ case Mnemonic_CMOVP:
536+ encoder_reg_reg(Mnemonic_CMOVP, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
537+ break;
538+ case Mnemonic_CMOVNP:
539+ encoder_reg_reg(Mnemonic_CMOVNP, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
540+ break;
541+ case Mnemonic_CMOVL:
542+ encoder_reg_reg(Mnemonic_CMOVL, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
543+ break;
544+ case Mnemonic_CMOVNL:
545+ encoder_reg_reg(Mnemonic_CMOVNL, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
546+ break;
547+ case Mnemonic_CMOVLE:
548+ encoder_reg_reg(Mnemonic_CMOVLE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
549+ break;
550+ case Mnemonic_CMOVNLE:
551+ encoder_reg_reg(Mnemonic_CMOVNLE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream);
552+ break;
553+ default :
554+ printf("the condition is not supported.\n");
555+ return;
556+ }
557+ mStream = mStream + encoder_get_inst_size(mStream);
558+}
559+
560+void X86Assembler::CMOV_MEM_TO_REG(Mnemonic cc, int disp, int base_reg, int dst, OpndSize size)
561+{
562+ switch (cc) {
563+ case Mnemonic_CMOVO:
564+ encoder_mem_reg(Mnemonic_CMOVO, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
565+ break;
566+ case Mnemonic_CMOVNO:
567+ encoder_mem_reg(Mnemonic_CMOVNO, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
568+ break;
569+ case Mnemonic_CMOVB:
570+ encoder_mem_reg(Mnemonic_CMOVB, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
571+ break;
572+ case Mnemonic_CMOVNB:
573+ encoder_mem_reg(Mnemonic_CMOVNB, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
574+ break;
575+ case Mnemonic_CMOVZ:
576+ encoder_mem_reg(Mnemonic_CMOVZ, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
577+ break;
578+ case Mnemonic_CMOVNZ:
579+ encoder_mem_reg(Mnemonic_CMOVNZ, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
580+ break;
581+ case Mnemonic_CMOVBE:
582+ encoder_mem_reg(Mnemonic_CMOVBE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
583+ break;
584+ case Mnemonic_CMOVNBE:
585+ encoder_mem_reg(Mnemonic_CMOVNBE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
586+ break;
587+ case Mnemonic_CMOVS:
588+ encoder_mem_reg(Mnemonic_CMOVS, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
589+ break;
590+ case Mnemonic_CMOVNS:
591+ encoder_mem_reg(Mnemonic_CMOVNS, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
592+ break;
593+ case Mnemonic_CMOVP:
594+ encoder_mem_reg(Mnemonic_CMOVP, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
595+ break;
596+ case Mnemonic_CMOVNP:
597+ encoder_mem_reg(Mnemonic_CMOVNP, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
598+ break;
599+ case Mnemonic_CMOVL:
600+ encoder_mem_reg(Mnemonic_CMOVL, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
601+ break;
602+ case Mnemonic_CMOVNL:
603+ encoder_mem_reg(Mnemonic_CMOVNL, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
604+ break;
605+ case Mnemonic_CMOVLE:
606+ encoder_mem_reg(Mnemonic_CMOVLE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
607+ break;
608+ case Mnemonic_CMOVNLE:
609+ encoder_mem_reg(Mnemonic_CMOVNLE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream);
610+ break;
611+ default :
612+ printf("the condition is not supported.\n");
613+ return;
614+ }
615+ mStream = mStream + encoder_get_inst_size(mStream);
616+}
617+
618+}; // namespace android
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/X86Assembler.h
@@ -0,0 +1,163 @@
1+/* libs/pixelflinger/codeflinger/x86/X86Assembler.h
2+**
3+** Copyright 2006, The Android Open Source Project
4+**
5+** Licensed under the Apache License, Version 2.0 (the "License");
6+** you may not use this file except in compliance with the License.
7+** You may obtain a copy of the License at
8+**
9+** http://www.apache.org/licenses/LICENSE-2.0
10+**
11+** Unless required by applicable law or agreed to in writing, software
12+** distributed under the License is distributed on an "AS IS" BASIS,
13+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+** See the License for the specific language governing permissions and
15+** limitations under the License.
16+*/
17+
18+#ifndef ANDROID_X86ASSEMBLER_H
19+#define ANDROID_X86ASSEMBLER_H
20+
21+#include <stdint.h>
22+#include <sys/types.h>
23+
24+#include <utils/Vector.h>
25+#include <utils/KeyedVector.h>
26+
27+#include "codeflinger/tinyutils/smartpointer.h"
28+#include "codeflinger/CodeCache.h"
29+#include "enc_wrapper.h"
30+
31+namespace android {
32+
33+// ----------------------------------------------------------------------------
34+
35+class X86Assembler
36+{
37+public:
38+
39+ enum {
40+ EAX = PhysicalReg_EAX, EBX = PhysicalReg_EBX, ECX = PhysicalReg_ECX,
41+ EDX = PhysicalReg_EDX, EDI = PhysicalReg_EDI, ESI = PhysicalReg_ESI,
42+ ESP = PhysicalReg_ESP, EBP = PhysicalReg_EBP
43+ };
44+
45+ X86Assembler(const sp<Assembly>& assembly);
46+ ~X86Assembler();
47+
48+ char* base() const;
49+ char* pc() const;
50+
51+
52+ void disassemble(const char* name);
53+
54+ // ------------------------------------------------------------------------
55+ // X86AssemblerInterface...
56+ // ------------------------------------------------------------------------
57+
58+ void reset();
59+
60+ int generate(const char* name);
61+
62+ void comment(const char* string);
63+
64+ void label(const char* theLabel);
65+
66+ void JCC(Mnemonic cc, const char* label);
67+
68+ void JMP(const char* label);
69+
70+ void prepare_esp(int old_offset);
71+
72+ void update_esp(int new_offset);
73+
74+ void shrink_esp(int shrink_offset);
75+
76+ void callee_work();
77+
78+ void return_work();
79+
80+ char* pcForLabel(const char* label);
81+
82+ void PUSH(int reg);
83+
84+ void POP(int reg);
85+
86+ void ADD_REG_TO_REG(int src, int dst);
87+ void ADD_IMM_TO_REG(int imm, int dst);
88+ void ADD_IMM_TO_MEM(int imm, int disp, int dst);
89+ void ADD_MEM_TO_REG(int base_reg, int disp, int dst);
90+ void ADD_REG_TO_MEM(int src, int base_reg, int disp);
91+ void SUB_REG_TO_REG(int src, int dst);
92+ void SUB_IMM_TO_REG(int imm, int dst);
93+ void SUB_IMM_TO_MEM(int imm, int disp, int dst);
94+ void SUB_REG_TO_MEM(int src, int base_reg, int disp);
95+
96+ void TEST_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32);
97+ void CMP_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32);
98+ void CMP_MEM_TO_REG(int base_reg, int disp, int dst, OpndSize size=OpndSize_32);
99+ void CMP_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size=OpndSize_32);
100+ void CMP_IMM_TO_REG(int imm, int dst);
101+
102+ void AND_REG_TO_REG(int src, int dst);
103+ void AND_IMM_TO_REG(int imm, int dst);
104+ void OR_REG_TO_REG(int src, int dst);
105+ void XOR(int src, int dst);
106+ void OR_IMM_TO_REG(int imm, int dst);
107+ void NOT(int dst);
108+ void NEG(int dst);
109+ void SHL(int imm, int dst);
110+ void SHL(int imm, int disp, int dst);
111+ void SHR(int imm, int dst);
112+ void SHR(int imm, int disp, int dst);
113+ void SAR(int imm, int dst);
114+ void ROR(const int imm, int dst);
115+ void ROR(int imm, int disp, int dst);
116+ void IMUL(int reg);
117+ void IMUL(int src, int dst);
118+ void MUL(int reg);
119+
120+ void MOVSX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst);
121+ void MOVSX_REG_TO_REG(OpndSize size, int src, int dst);
122+ void MOVZX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst);
123+ void MOVZX_REG_TO_REG(OpndSize size, int src, int dst);
124+ void MOV_IMM_TO_REG(int32_t imm, int dst);
125+ void MOV_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32);
126+ void MOV_MEM_TO_REG(int disp, int base_reg, int reg, OpndSize size=OpndSize_32);
127+ void MOV_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size=OpndSize_32);
128+ void MOV_MEM_SCALE_TO_REG(int base_reg, int index_reg, int scale, int reg, OpndSize size=OpndSize_32);
129+ void CMOV_REG_TO_REG(Mnemonic cc, int src, int dst, OpndSize size=OpndSize_32);
130+ void CMOV_MEM_TO_REG(Mnemonic cc, int disp, int base_reg, int dst, OpndSize size=OpndSize_32);
131+
132+
133+ sp<Assembly> mAssembly;
134+ char* mBase;
135+ char* mStream;
136+ //branch target offset is relative to the next instruction
137+ char* mStreamNext;
138+ //updating esp after iterating the loop
139+ char* mStreamUpdate;
140+
141+ int64_t mDuration;
142+#if defined(WITH_LIB_HARDWARE)
143+ bool mQemuTracing;
144+#endif
145+
146+ struct branch_target_t {
147+ inline branch_target_t() : label(0), pc(0), next_pc(0) { }
148+ inline branch_target_t(const char* l, char* p, char* next_p)
149+ : label(l), pc(p), next_pc(next_p) { }
150+ const char* label;
151+ char* pc;
152+ char* next_pc;
153+ };
154+
155+ Vector<branch_target_t> mBranchTargets;
156+ KeyedVector< const char*, char* > mLabels;
157+ KeyedVector< char*, const char* > mLabelsInverseMapping;
158+ KeyedVector< char*, const char* > mComments;
159+};
160+
161+}; // namespace android
162+
163+#endif //ANDROID_X86ASSEMBLER_H
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/blending.cpp
@@ -0,0 +1,974 @@
1+/* libs/pixelflinger/codeflinger/x86/blending.cpp
2+**
3+** Copyright 2006, The Android Open Source Project
4+**
5+** Licensed under the Apache License, Version 2.0 (the "License");
6+** you may not use this file except in compliance with the License.
7+** You may obtain a copy of the License at
8+**
9+** http://www.apache.org/licenses/LICENSE-2.0
10+**
11+** Unless required by applicable law or agreed to in writing, software
12+** distributed under the License is distributed on an "AS IS" BASIS,
13+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+** See the License for the specific language governing permissions and
15+** limitations under the License.
16+*/
17+
18+#include <assert.h>
19+#include <stdint.h>
20+#include <stdlib.h>
21+#include <stdio.h>
22+#include <sys/types.h>
23+
24+#include <cutils/log.h>
25+
26+#include "codeflinger/x86/GGLX86Assembler.h"
27+
28+
29+namespace android {
30+
31+void GGLX86Assembler::build_fog(
32+ component_t& temp, // incomming fragment / output
33+ int component,
34+ Scratch& regs)
35+{
36+ if (mInfo[component].fog) {
37+ Scratch scratches(registerFile());
38+ comment("fog");
39+
40+ temp.reg = scratches.obtain();
41+ MOV_MEM_TO_REG(temp.offset_ebp, EBP, temp.reg);
42+ integer_t fragment(temp.reg, temp.h, temp.flags, temp.offset_ebp);
43+ if (!(temp.flags & CORRUPTIBLE)) {
44+ temp.reg = regs.obtain();
45+ temp.flags |= CORRUPTIBLE;
46+ }
47+
48+ integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
49+ mBuilderContext.Rctx = scratches.obtain();
50+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
51+ MOVZX_MEM_TO_REG(OpndSize_8, mBuilderContext.Rctx, GGL_OFFSETOF(state.fog.color[component]), fogColor.reg);
52+
53+ integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
54+ CONTEXT_LOAD(factor.reg, generated_vars.f);
55+ scratches.recycle(mBuilderContext.Rctx);
56+
57+ // clamp fog factor (TODO: see if there is a way to guarantee
58+ // we won't overflow, when setting the iterators)
59+ int temp_reg = scratches.obtain();
60+ MOV_REG_TO_REG(factor.reg, temp_reg);
61+ SAR(31, temp_reg);
62+ NOT(temp_reg);
63+ AND_REG_TO_REG(temp_reg, factor.reg);
64+ MOV_IMM_TO_REG(0x10000, temp_reg);
65+ CMP_IMM_TO_REG(0x10000, factor.reg);
66+ CMOV_REG_TO_REG(Mnemonic_CMOVAE, temp_reg, factor.reg);
67+ scratches.recycle(temp_reg);
68+
69+ //we will resue factor.reg
70+ build_blendFOneMinusF(temp, factor, fragment, fogColor);
71+ MOV_REG_TO_MEM(temp.reg, temp.offset_ebp, EBP);
72+ scratches.recycle(temp.reg);
73+ }
74+}
75+
76+void GGLX86Assembler::build_blending(
77+ component_t& temp, // incomming fragment / output
78+ pixel_t& pixel, // framebuffer
79+ int component,
80+ Scratch& regs)
81+{
82+ if (!mInfo[component].blend)
83+ return;
84+
85+ int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
86+ int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
87+ if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
88+ fs = GGL_ONE;
89+ const int blending = blending_codes(fs, fd);
90+ if (!temp.size()) {
91+ // here, blending will produce something which doesn't depend on
92+ // that component (eg: GL_ZERO:GL_*), so the register has not been
93+ // allocated yet. Will never be used as a source.
94+ //temp = component_t(regs.obtain(), CORRUPTIBLE, temp_offset_ebp);
95+ temp.reg = regs.obtain();
96+ temp.flags = CORRUPTIBLE;
97+ temp.h = temp.l = 0;
98+ } else {
99+ temp.reg = regs.obtain();
100+ }
101+ MOV_MEM_TO_REG(temp.offset_ebp, EBP, temp.reg);
102+ // we are doing real blending...
103+ // fb: extracted dst
104+ // fragment: extracted src
105+ // temp: component_t(fragment) and result
106+
107+ // scoped register allocator
108+ Scratch scratches(registerFile());
109+ comment("blending");
110+
111+ // we can optimize these cases a bit...
112+ // (1) saturation is not needed
113+ // (2) we can use only one multiply instead of 2
114+ // (3) we can reduce the register pressure
115+ // R = S*f + D*(1-f) = (S-D)*f + D
116+ // R = S*(1-f) + D*f = (D-S)*f + S
117+
118+ const bool same_factor_opt1 =
119+ (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
120+ (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
121+ (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
122+ (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
123+
124+ const bool same_factor_opt2 =
125+ (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
126+ (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
127+ (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
128+ (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
129+
130+
131+ // XXX: we could also optimize these cases:
132+ // R = S*f + D*f = (S+D)*f
133+ // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
134+ // R = S*D + D*S = 2*S*D
135+
136+
137+ pixel.reg = scratches.obtain();
138+ MOV_MEM_TO_REG(pixel.offset_ebp, EBP, pixel.reg);
139+ // see if we need to extract 'component' from the destination (fb)
140+ integer_t fb;
141+ if (blending & (BLEND_DST|FACTOR_DST)) {
142+ fb.setTo(scratches.obtain(), 32);
143+ extract(fb, pixel, component);
144+ if (mDithering) {
145+ // XXX: maybe what we should do instead, is simply
146+ // expand fb -or- fragment to the larger of the two
147+ if (fb.size() < temp.size()) {
148+ // for now we expand 'fb' to min(fragment, 8)
149+ int new_size = temp.size() < 8 ? temp.size() : 8;
150+ expand(fb, fb, new_size);
151+ }
152+ }
153+ }
154+
155+ // convert input fragment to integer_t
156+ if (temp.l && (temp.flags & CORRUPTIBLE)) {
157+ SHR(temp.l, temp.reg);
158+ temp.h -= temp.l;
159+ temp.l = 0;
160+ }
161+ integer_t fragment(temp.reg, temp.size(), temp.flags, temp.offset_ebp);
162+
163+ // if not done yet, convert input fragment to integer_t
164+ if (temp.l) {
165+ // here we know temp is not CORRUPTIBLE
166+ fragment.reg = scratches.obtain();
167+ MOV_REG_TO_REG(temp.reg, fragment.reg);
168+ SHR(temp.l, fragment.reg);
169+ fragment.flags |= CORRUPTIBLE;
170+ }
171+
172+ if (!(temp.flags & CORRUPTIBLE)) {
173+ // temp is not corruptible, but since it's the destination it
174+ // will be modified, so we need to allocate a new register.
175+ temp.reg = regs.obtain();
176+ temp.flags &= ~CORRUPTIBLE;
177+ fragment.flags &= ~CORRUPTIBLE;
178+ }
179+
180+ if ((blending & BLEND_SRC) && !same_factor_opt1) {
181+ // source (fragment) is needed for the blending stage
182+ // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
183+ fragment.flags &= ~CORRUPTIBLE;
184+ }
185+
186+
187+ if (same_factor_opt1) {
188+ // R = S*f + D*(1-f) = (S-D)*f + D
189+ integer_t factor;
190+ build_blend_factor(factor, fs,
191+ component, pixel, fragment, fb, scratches);
192+ // fb is always corruptible from this point
193+ fb.flags |= CORRUPTIBLE;
194+ //we will reuse factor in mul_factor_add of build_blendFOneMinusF, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor
195+ if(factor.reg == fragment.reg || factor.reg == fb.reg)
196+ MOV_REG_TO_REG(factor.reg, pixel.reg);
197+ else
198+ scratches.recycle(pixel.reg);
199+ build_blendFOneMinusF(temp, factor, fragment, fb);
200+ if(factor.reg == fragment.reg || factor.reg == fb.reg) {
201+ MOV_REG_TO_REG(pixel.reg, factor.reg);
202+ scratches.recycle(pixel.reg);
203+ }
204+ scratches.recycle(fb.reg);
205+ //scratches.recycle(factor.reg);
206+ } else if (same_factor_opt2) {
207+ // R = S*(1-f) + D*f = (D-S)*f + S
208+ integer_t factor;
209+ // fb is always corrruptible here
210+ fb.flags |= CORRUPTIBLE;
211+ build_blend_factor(factor, fd,
212+ component, pixel, fragment, fb, scratches);
213+ //we will reuse factor in mul_factor_add of build_blendFOneMinusFF, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor
214+ if(factor.reg == fragment.reg || factor.reg == fb.reg)
215+ MOV_REG_TO_REG(factor.reg, pixel.reg);
216+ else
217+ scratches.recycle(pixel.reg);
218+ build_blendOneMinusFF(temp, factor, fragment, fb);
219+ if(factor.reg == fragment.reg || factor.reg == fb.reg) {
220+ MOV_REG_TO_REG(pixel.reg, factor.reg);
221+ scratches.recycle(pixel.reg);
222+ }
223+ scratches.recycle(fb.reg);
224+ } else {
225+ integer_t src_factor;
226+ integer_t dst_factor;
227+
228+ // if destination (fb) is not needed for the blending stage,
229+ // then it can be marked as CORRUPTIBLE
230+ if (!(blending & BLEND_DST)) {
231+ fb.flags |= CORRUPTIBLE;
232+ }
233+
234+ // XXX: try to mark some registers as CORRUPTIBLE
235+ // in most case we could make those corruptible
236+ // when we're processing the last component
237+ // but not always, for instance
238+ // when fragment is constant and not reloaded
239+ // when fb is needed for logic-ops or masking
240+ // when a register is aliased (for instance with mAlphaSource)
241+
242+ // blend away...
243+ if (fs==GGL_ZERO) {
244+ if (fd==GGL_ZERO) { // R = 0
245+ // already taken care of
246+ } else if (fd==GGL_ONE) { // R = D
247+ // already taken care of
248+ } else { // R = D*fd
249+ // compute fd
250+ build_blend_factor(dst_factor, fd,
251+ component, pixel, fragment, fb, scratches);
252+ scratches.recycle(pixel.reg);
253+ mul_factor(temp, fb, dst_factor, regs);
254+ scratches.recycle(fb.reg);
255+ }
256+ } else if (fs==GGL_ONE) {
257+ int temp_reg;
258+ if (fd==GGL_ZERO) { // R = S
259+ // NOP, taken care of
260+ } else if (fd==GGL_ONE) { // R = S + D
261+ component_add(temp, fb, fragment); // args order matters
262+ temp_reg = scratches.obtain();
263+ component_sat(temp, temp_reg);
264+ scratches.recycle(temp_reg);
265+ } else { // R = S + D*fd
266+ // compute fd
267+ build_blend_factor(dst_factor, fd,
268+ component, pixel, fragment, fb, scratches);
269+ //we will probably change src_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor
270+ if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg)
271+ MOV_REG_TO_REG(dst_factor.reg, pixel.reg);
272+ else
273+ scratches.recycle(pixel.reg);
274+ mul_factor_add(temp, fb, dst_factor, component_t(fragment));
275+ if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) {
276+ MOV_REG_TO_REG(pixel.reg, dst_factor.reg);
277+ scratches.recycle(pixel.reg);
278+ }
279+ temp_reg = fb.reg;
280+ component_sat(temp, temp_reg);
281+ scratches.recycle(fb.reg);
282+ }
283+ } else {
284+ // compute fs
285+ int temp_reg;
286+ build_blend_factor(src_factor, fs,
287+ component, pixel, fragment, fb, scratches);
288+ if (fd==GGL_ZERO) { // R = S*fs
289+ mul_factor(temp, fragment, src_factor, regs);
290+ if (scratches.isUsed(src_factor.reg))
291+ scratches.recycle(src_factor.reg);
292+ } else if (fd==GGL_ONE) { // R = S*fs + D
293+ //we will probably change src_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor
294+ if(src_factor.reg == fragment.reg || src_factor.reg == fb.reg)
295+ MOV_REG_TO_REG(src_factor.reg, pixel.reg);
296+ else
297+ scratches.recycle(pixel.reg);
298+ mul_factor_add(temp, fragment, src_factor, component_t(fb));
299+ if(src_factor.reg == fragment.reg || src_factor.reg == fb.reg) {
300+ MOV_REG_TO_REG(pixel.reg, src_factor.reg);
301+ scratches.recycle(pixel.reg);
302+ }
303+ temp_reg = fb.reg;
304+ component_sat(temp, temp_reg);
305+ scratches.recycle(fb.reg);
306+ } else { // R = S*fs + D*fd
307+ mul_factor(temp, fragment, src_factor, regs);
308+ if (scratches.isUsed(src_factor.reg))
309+ scratches.recycle(src_factor.reg);
310+ // compute fd
311+ build_blend_factor(dst_factor, fd,
312+ component, pixel, fragment, fb, scratches);
313+ //we will probably change dst_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg
314+ if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg)
315+ MOV_REG_TO_REG(dst_factor.reg, pixel.reg);
316+ else
317+ scratches.recycle(pixel.reg);
318+ mul_factor_add(temp, fb, dst_factor, temp);
319+ if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) {
320+ MOV_REG_TO_REG(pixel.reg, dst_factor.reg);
321+ scratches.recycle(pixel.reg);
322+ }
323+ if (!same_factor_opt1 && !same_factor_opt2) {
324+ temp_reg = fb.reg;
325+ component_sat(temp, temp_reg);
326+ }
327+ scratches.recycle(fb.reg);
328+ }
329+ if(scratches.isUsed(pixel.reg))
330+ scratches.recycle(pixel.reg);
331+ }
332+ }
333+ // temp is modified, but it will be used immediately in downshift
334+ //printf("temp.offset_ebp: %d \n", temp.offset_ebp);
335+ //below will be triggered on CDK for surfaceflinger
336+ if(temp.offset_ebp == mAlphaSource.offset_ebp) {
337+ mCurSp = mCurSp - 4;
338+ temp.offset_ebp = mCurSp;
339+ }
340+ // the r, g, b value must be stored, otherwise the color of globaltime is incorrect.
341+ MOV_REG_TO_MEM(temp.reg, temp.offset_ebp, EBP);
342+ regs.recycle(temp.reg);
343+
344+ // now we can be corrupted (it's the dest)
345+ temp.flags |= CORRUPTIBLE;
346+}
347+
348+void GGLX86Assembler::build_blend_factor(
349+ integer_t& factor, int f, int component,
350+ const pixel_t& dst_pixel,
351+ integer_t& fragment,
352+ integer_t& fb,
353+ Scratch& scratches)
354+{
355+ integer_t src_alpha(fragment);
356+
357+ // src_factor/dst_factor won't be used after blending,
358+ // so it's fine to mark them as CORRUPTIBLE (if not aliased)
359+ factor.flags |= CORRUPTIBLE;
360+ int temp_reg;
361+ switch(f) {
362+ case GGL_ONE_MINUS_SRC_ALPHA:
363+ case GGL_SRC_ALPHA:
364+ if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
365+ // we're processing alpha, so we already have
366+ // src-alpha in fragment, and we need src-alpha just this time.
367+ } else {
368+ // alpha-src will be needed for other components
369+ factor = mAlphaSource;
370+ factor.flags &= ~CORRUPTIBLE;
371+ factor.reg = scratches.obtain();
372+ //printf("mAlphaSource.offset_ebp: %d \n", mAlphaSource.offset_ebp);
373+ //printf("fragment.offset_ebp: %d \n", fragment.offset_ebp);
374+ //printf("factor.offset_ebp: %d \n", factor.offset_ebp);
375+ MOV_MEM_TO_REG(mAlphaSource.offset_ebp, EBP, factor.reg);
376+ if (!mBlendFactorCached || mBlendFactorCached==f) {
377+ src_alpha = mAlphaSource;
378+ // we already computed the blend factor before, nothing to do.
379+ if (mBlendFactorCached)
380+ return;
381+ // this is the first time, make sure to compute the blend
382+ // factor properly.
383+ mBlendFactorCached = f;
384+ break;
385+ } else {
386+ // we have a cached alpha blend factor, but we want another one,
387+ // this should really not happen because by construction,
388+ // we cannot have BOTH source and destination
389+ // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
390+ // the blending stage uses the f/(1-f) optimization
391+
392+ // for completeness, we handle this case though. Since there
393+ // are only 2 choices, this meens we want "the other one"
394+ // (1-factor)
395+ //factor = mAlphaSource;
396+ //factor.flags &= ~CORRUPTIBLE;
397+ NEG(factor.reg);
398+ ADD_IMM_TO_REG((1<<factor.s), factor.reg);
399+ MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP);
400+ mBlendFactorCached = f;
401+ return;
402+ }
403+ }
404+ // fall-through...
405+ case GGL_ONE_MINUS_DST_COLOR:
406+ case GGL_DST_COLOR:
407+ case GGL_ONE_MINUS_SRC_COLOR:
408+ case GGL_SRC_COLOR:
409+ case GGL_ONE_MINUS_DST_ALPHA:
410+ case GGL_DST_ALPHA:
411+ case GGL_SRC_ALPHA_SATURATE:
412+ // help us find out what register we can use for the blend-factor
413+ // CORRUPTIBLE registers are chosen first, or a new one is allocated.
414+ if (fragment.flags & CORRUPTIBLE) {
415+ factor.setTo(fragment.reg, 32, CORRUPTIBLE, fragment.offset_ebp);
416+ fragment.flags &= ~CORRUPTIBLE;
417+ } else if (fb.flags & CORRUPTIBLE) {
418+ factor.setTo(fb.reg, 32, CORRUPTIBLE, fb.offset_ebp);
419+ fb.flags &= ~CORRUPTIBLE;
420+ } else {
421+ factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
422+ mCurSp = mCurSp - 4;
423+ factor.offset_ebp = mCurSp;
424+ }
425+ break;
426+ }
427+
428+ // XXX: doesn't work if size==1
429+
430+ switch(f) {
431+ case GGL_ONE_MINUS_DST_COLOR:
432+ case GGL_DST_COLOR:
433+ factor.s = fb.s;
434+ MOV_REG_TO_REG(fb.reg, factor.reg);
435+ SHR(fb.s-1, factor.reg);
436+ ADD_REG_TO_REG(fb.reg, factor.reg);
437+ break;
438+ case GGL_ONE_MINUS_SRC_COLOR:
439+ case GGL_SRC_COLOR:
440+ factor.s = fragment.s;
441+ temp_reg = scratches.obtain();
442+ MOV_REG_TO_REG(fragment.reg, temp_reg);
443+ SHR(fragment.s-1, fragment.reg);
444+ ADD_REG_TO_REG(temp_reg, fragment.reg);
445+ scratches.recycle(temp_reg);
446+ break;
447+ case GGL_ONE_MINUS_SRC_ALPHA:
448+ case GGL_SRC_ALPHA:
449+ factor.s = src_alpha.s;
450+ if (mBlendFactorCached == f) {
451+ //src_alpha == factor == mAlphaSource, we need a temp reg
452+ if(scratches.countFreeRegs()) {
453+ temp_reg = scratches.obtain();
454+ MOV_REG_TO_REG(factor.reg, temp_reg);
455+ SHR(src_alpha.s-1, factor.reg);
456+ ADD_REG_TO_REG(temp_reg, factor.reg);
457+ scratches.recycle(temp_reg);
458+ }
459+ else {
460+ SHR(src_alpha.s-1, factor.offset_ebp, EBP);
461+ ADD_MEM_TO_REG(EBP, factor.offset_ebp, factor.reg);
462+ }
463+ }
464+ else
465+ {
466+ MOV_REG_TO_REG(src_alpha.reg, factor.reg);
467+ SHR(src_alpha.s-1, factor.reg);
468+ ADD_REG_TO_REG(src_alpha.reg, factor.reg);
469+ }
470+ // we will store factor in the next switch for GGL_ONE_MINUS_SRC_ALPHA
471+ if(f == GGL_SRC_ALPHA)
472+ MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP);
473+ break;
474+ case GGL_ONE_MINUS_DST_ALPHA:
475+ case GGL_DST_ALPHA:
476+ // XXX: should be precomputed
477+ extract(factor, dst_pixel, GGLFormat::ALPHA);
478+ temp_reg = scratches.obtain();
479+ MOV_REG_TO_REG(factor.reg, temp_reg);
480+ SHR(factor.s-1, factor.reg);
481+ ADD_REG_TO_REG(temp_reg, factor.reg);
482+ scratches.recycle(temp_reg);
483+ break;
484+ case GGL_SRC_ALPHA_SATURATE:
485+ // XXX: should be precomputed
486+ // XXX: f = min(As, 1-Ad)
487+ // btw, we're guaranteed that Ad's size is <= 8, because
488+ // it's extracted from the framebuffer
489+ break;
490+ }
491+
492+ switch(f) {
493+ case GGL_ONE_MINUS_DST_COLOR:
494+ case GGL_ONE_MINUS_SRC_COLOR:
495+ case GGL_ONE_MINUS_DST_ALPHA:
496+ case GGL_ONE_MINUS_SRC_ALPHA:
497+ NEG(factor.reg);
498+ ADD_IMM_TO_REG(1<<factor.s, factor.reg);
499+ MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP);
500+ }
501+
502+ // don't need more than 8-bits for the blend factor
503+ // and this will prevent overflows in the multiplies later
504+ if (factor.s > 8) {
505+ SHR(factor.s-8, factor.reg);
506+ factor.s = 8;
507+ if(f == GGL_ONE_MINUS_SRC_ALPHA || f == GGL_SRC_ALPHA)
508+ MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP);
509+ }
510+ //below will be triggered on CDK for surfaceflinger
511+ if(fragment.offset_ebp == mAlphaSource.offset_ebp)
512+ MOV_REG_TO_REG(factor.reg, fragment.reg);
513+}
514+
515+int GGLX86Assembler::blending_codes(int fs, int fd)
516+{
517+ int blending = 0;
518+ switch(fs) {
519+ case GGL_ONE:
520+ blending |= BLEND_SRC;
521+ break;
522+
523+ case GGL_ONE_MINUS_DST_COLOR:
524+ case GGL_DST_COLOR:
525+ blending |= FACTOR_DST|BLEND_SRC;
526+ break;
527+ case GGL_ONE_MINUS_DST_ALPHA:
528+ case GGL_DST_ALPHA:
529+ // no need to extract 'component' from the destination
530+ // for the blend factor, because we need ALPHA only.
531+ blending |= BLEND_SRC;
532+ break;
533+
534+ case GGL_ONE_MINUS_SRC_COLOR:
535+ case GGL_SRC_COLOR:
536+ blending |= FACTOR_SRC|BLEND_SRC;
537+ break;
538+ case GGL_ONE_MINUS_SRC_ALPHA:
539+ case GGL_SRC_ALPHA:
540+ case GGL_SRC_ALPHA_SATURATE:
541+ blending |= FACTOR_SRC|BLEND_SRC;
542+ break;
543+ }
544+ switch(fd) {
545+ case GGL_ONE:
546+ blending |= BLEND_DST;
547+ break;
548+
549+ case GGL_ONE_MINUS_DST_COLOR:
550+ case GGL_DST_COLOR:
551+ blending |= FACTOR_DST|BLEND_DST;
552+ break;
553+ case GGL_ONE_MINUS_DST_ALPHA:
554+ case GGL_DST_ALPHA:
555+ blending |= FACTOR_DST|BLEND_DST;
556+ break;
557+
558+ case GGL_ONE_MINUS_SRC_COLOR:
559+ case GGL_SRC_COLOR:
560+ blending |= FACTOR_SRC|BLEND_DST;
561+ break;
562+ case GGL_ONE_MINUS_SRC_ALPHA:
563+ case GGL_SRC_ALPHA:
564+ // no need to extract 'component' from the source
565+ // for the blend factor, because we need ALPHA only.
566+ blending |= BLEND_DST;
567+ break;
568+ }
569+ return blending;
570+}
571+
572+// ---------------------------------------------------------------------------
573+
574+void GGLX86Assembler::build_blendFOneMinusF(
575+ component_t& temp,
576+ const integer_t& factor,
577+ const integer_t& fragment,
578+ const integer_t& fb)
579+{
580+ // R = S*f + D*(1-f) = (S-D)*f + D
581+ // compute S-D
582+ Scratch scratches(registerFile());
583+ integer_t diff(fragment.flags & CORRUPTIBLE ?
584+ fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
585+ const int shift = fragment.size() - fb.size();
586+ if (shift>0) {
587+ MOV_REG_TO_REG(fragment.reg, diff.reg);
588+ SHR(shift, diff.reg);
589+ SUB_REG_TO_REG(fb.reg, diff.reg);
590+ } else if (shift<0) {
591+ MOV_REG_TO_REG(fragment.reg, diff.reg);
592+ SHL(-shift, diff.reg);
593+ SUB_REG_TO_REG(fb.reg, diff.reg);
594+ } else {
595+ MOV_REG_TO_REG(fragment.reg, diff.reg);
596+ SUB_REG_TO_REG(fb.reg, diff.reg);
597+ }
598+ mul_factor_add(temp, diff, factor, component_t(fb));
599+ if(!(fragment.flags & CORRUPTIBLE))
600+ scratches.recycle(diff.reg);
601+}
602+
603+void GGLX86Assembler::build_blendOneMinusFF(
604+ component_t& temp,
605+ const integer_t& factor,
606+ const integer_t& fragment,
607+ const integer_t& fb)
608+{
609+ // R = S*f + D*(1-f) = (S-D)*f + D
610+ Scratch scratches(registerFile());
611+ // compute D-S
612+ integer_t diff(fb.flags & CORRUPTIBLE ?
613+ fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
614+ const int shift = fragment.size() - fb.size();
615+ if (shift>0) {
616+ SHR(shift, fragment.reg);
617+ MOV_REG_TO_REG(fb.reg, diff.reg);
618+ SUB_REG_TO_REG(fragment.reg, diff.reg);
619+ }
620+ else if (shift<0) {
621+ SHR(-shift, fragment.reg);
622+ MOV_REG_TO_REG(fb.reg, diff.reg);
623+ SUB_REG_TO_REG(fragment.reg, diff.reg);
624+ }
625+ else {
626+ MOV_REG_TO_REG(fb.reg, diff.reg);
627+ SUB_REG_TO_REG(fragment.reg, diff.reg);
628+ }
629+
630+ mul_factor_add(temp, diff, factor, component_t(fragment));
631+ if(!(fragment.flags & CORRUPTIBLE))
632+ scratches.recycle(diff.reg);
633+}
634+
635+// ---------------------------------------------------------------------------
636+
637+void GGLX86Assembler::mul_factor( component_t& d,
638+ const integer_t& v,
639+ const integer_t& f, Scratch& scratches)
640+{
641+ // f can be changed
642+ //
643+ int vs = v.size();
644+ int fs = f.size();
645+ int ms = vs+fs;
646+
647+ // XXX: we could have special cases for 1 bit mul
648+
649+ // all this code below to use the best multiply instruction
650+ // wrt the parameters size. We take advantage of the fact
651+ // that the 16-bits multiplies allow a 16-bit shift
652+ // The trick is that we just make sure that we have at least 8-bits
653+ // per component (which is enough for a 8 bits display).
654+
655+ int xy = -1;
656+ int vshift = 0;
657+ int fshift = 0;
658+ int smulw = 0;
659+
660+ int xyBB = 0;
661+ int xyTB = 1;
662+ int xyTT = 2;
663+ int xyBT = 3;
664+ if (vs<16) {
665+ if (fs<16) {
666+ xy = xyBB;
667+ } else if (GGL_BETWEEN(fs, 24, 31)) {
668+ ms -= 16;
669+ xy = xyTB;
670+ } else {
671+ // eg: 15 * 18 -> 15 * 15
672+ fshift = fs - 15;
673+ ms -= fshift;
674+ xy = xyBB;
675+ }
676+ } else if (GGL_BETWEEN(vs, 24, 31)) {
677+ if (fs<16) {
678+ ms -= 16;
679+ xy = xyTB;
680+ } else if (GGL_BETWEEN(fs, 24, 31)) {
681+ ms -= 32;
682+ xy = xyTT;
683+ } else {
684+ // eg: 24 * 18 -> 8 * 18
685+ fshift = fs - 15;
686+ ms -= 16 + fshift;
687+ xy = xyTB;
688+ }
689+ } else {
690+ if (fs<16) {
691+ // eg: 18 * 15 -> 15 * 15
692+ vshift = vs - 15;
693+ ms -= vshift;
694+ xy = xyBB;
695+ } else if (GGL_BETWEEN(fs, 24, 31)) {
696+ // eg: 18 * 24 -> 15 * 8
697+ vshift = vs - 15;
698+ ms -= 16 + vshift;
699+ xy = xyBT;
700+ } else {
701+ // eg: 18 * 18 -> (15 * 18)>>16
702+ fshift = fs - 15;
703+ ms -= 16 + fshift;
704+ //xy = yB; //XXX SMULWB
705+ smulw = 1;
706+ }
707+ }
708+
709+ ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
710+
711+ int vreg = v.reg;
712+ int freg = f.reg;
713+ if (vshift) {
714+ MOV_REG_TO_REG(vreg, d.reg);
715+ SHR(vshift, d.reg);
716+ vreg = d.reg;
717+ }
718+ if (fshift) {
719+ MOV_REG_TO_REG(vreg, d.reg);
720+ SHR(fshift, d.reg);
721+ freg = d.reg;
722+ }
723+ MOV_REG_TO_REG(vreg, d.reg);
724+ if (smulw) {
725+ int flag_push_edx = 0;
726+ int flag_reserve_edx = 0;
727+ int temp_reg2 = -1;
728+ int edx_offset_ebp = 0;
729+ if(scratches.isUsed(EDX) == 1) {
730+ if(d.reg != EDX) {
731+ flag_push_edx = 1;
732+ mCurSp = mCurSp - 4;
733+ edx_offset_ebp = mCurSp;
734+ MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP);
735+ //PUSH(EDX);
736+ }
737+ }
738+ else {
739+ flag_reserve_edx = 1;
740+ scratches.reserve(EDX);
741+ }
742+ if(scratches.isUsed(EAX)) {
743+ if( freg == EAX || d.reg == EAX) {
744+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
745+ if(freg == EAX)
746+ IMUL(d.reg);
747+ else
748+ IMUL(freg);
749+ SHL(16, EDX);
750+ SHR(16, EAX);
751+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
752+ MOV_REG_TO_REG(EDX, d.reg);
753+ }
754+ else {
755+ int eax_offset_ebp = 0;
756+ if(scratches.countFreeRegs() > 0) {
757+ temp_reg2 = scratches.obtain();
758+ MOV_REG_TO_REG(EAX, temp_reg2);
759+ }
760+ else {
761+ mCurSp = mCurSp - 4;
762+ eax_offset_ebp = mCurSp;
763+ MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP);
764+ //PUSH(EAX);
765+ }
766+ MOV_REG_TO_REG(freg, EAX);
767+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
768+ IMUL(d.reg);
769+ SHL(16, EDX);
770+ SHR(16, EAX);
771+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
772+ MOV_REG_TO_REG(EDX, d.reg);
773+ if(temp_reg2 > -1) {
774+ MOV_REG_TO_REG(temp_reg2, EAX);
775+ scratches.recycle(temp_reg2);
776+ }
777+ else {
778+ MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX);
779+ //POP(EAX);
780+ }
781+ }
782+ }
783+ else {
784+ MOV_REG_TO_REG(freg, EAX);
785+ MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX);
786+ IMUL(d.reg);
787+ SHL(16, EDX);
788+ SHR(16, EAX);
789+ MOV_REG_TO_REG(EAX, EDX, OpndSize_16);
790+ MOV_REG_TO_REG(EDX, d.reg);
791+ }
792+ if(flag_push_edx == 1) {
793+ MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX);
794+ //POP(EDX);
795+ }
796+ if(flag_reserve_edx ==1)
797+ scratches.recycle(EDX);
798+ }
799+ else {
800+ if(xy == xyBB) {
801+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
802+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
803+ IMUL(freg, d.reg);
804+ }
805+ else if(xy == xyTB) {
806+ SHR(16, d.reg);
807+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
808+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
809+ IMUL(freg, d.reg);
810+ }
811+ else if(xy == xyBT) {
812+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
813+ SHR(16, freg);
814+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
815+ IMUL(freg, d.reg);
816+ }
817+ else if(xy == xyTT) {
818+ SHR(16, d.reg);
819+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
820+ SHR(16, freg);
821+ MOVSX_REG_TO_REG(OpndSize_16, freg, freg);
822+ IMUL(freg, d.reg);
823+ }
824+ }
825+
826+
827+ d.h = ms;
828+ if (mDithering) {
829+ d.l = 0;
830+ } else {
831+ d.l = fs;
832+ d.flags |= CLEAR_LO;
833+ }
834+}
835+
836+void GGLX86Assembler::mul_factor_add( component_t& d,
837+ const integer_t& v,
838+ const integer_t& f,
839+ const component_t& a)
840+{
841+ // XXX: we could have special cases for 1 bit mul
842+ Scratch scratches(registerFile());
843+
844+ int vs = v.size();
845+ int fs = f.size();
846+ int as = a.h;
847+ int ms = vs+fs;
848+
849+ ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
850+
851+ integer_t add(a.reg, a.h, a.flags, a.offset_ebp);
852+
853+
854+ // 'a' is a component_t but it is guaranteed to have
855+ // its high bits set to 0. However in the dithering case,
856+ // we can't get away with truncating the potentially bad bits
857+ // so extraction is needed.
858+
859+ if ((mDithering) && (a.size() < ms)) {
860+ // we need to expand a
861+ if (!(a.flags & CORRUPTIBLE)) {
862+ // ... but it's not corruptible, so we need to pick a
863+ // temporary register.
864+ // Try to uses the destination register first (it's likely
865+ // to be usable, unless it aliases an input).
866+ if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
867+ add.reg = d.reg;
868+ } else {
869+ add.reg = scratches.obtain();
870+ }
871+ }
872+ expand(add, a, ms); // extracts and expands
873+ as = ms;
874+ }
875+
876+ if (ms == as) {
877+ MOV_REG_TO_REG(v.reg, d.reg);
878+ if (vs<16 && fs<16) {
879+ MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg);
880+ MOVSX_REG_TO_REG(OpndSize_16, f.reg, f.reg);
881+ IMUL(f.reg, d.reg);
882+ }
883+ else
884+ IMUL(f.reg, d.reg);
885+ ADD_REG_TO_REG(add.reg, d.reg);
886+ } else {
887+ //int temp = d.reg;
888+ //if (temp == add.reg) {
889+ // // the mul will modify add.reg, we need an intermediary reg
890+ // if (v.flags & CORRUPTIBLE) temp = v.reg;
891+ // else if (f.flags & CORRUPTIBLE) temp = f.reg;
892+ // else temp = scratches.obtain();
893+ //}
894+
895+ // below d.reg may override "temp" result, so we use a new register
896+ int temp_reg;
897+ int v_offset_ebp = 0;
898+ if(scratches.countFreeRegs() == 0) {
899+ temp_reg = v.reg;
900+ mCurSp = mCurSp - 4;
901+ v_offset_ebp = mCurSp;
902+ MOV_REG_TO_MEM(v.reg, v_offset_ebp, EBP);
903+ }
904+ else {
905+ temp_reg = scratches.obtain();
906+ MOV_REG_TO_REG(v.reg, temp_reg);
907+ }
908+ if (vs<16 && fs<16) {
909+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg, temp_reg);
910+ MOVSX_REG_TO_REG(OpndSize_16, f.reg, f.reg);
911+ IMUL(f.reg, temp_reg);
912+ }
913+ else
914+ IMUL(f.reg, temp_reg);
915+
916+ if (ms>as) {
917+ MOV_REG_TO_REG(add.reg, d.reg);
918+ SHL(ms-as, d.reg);
919+ ADD_REG_TO_REG(temp_reg, d.reg);
920+ } else if (ms<as) {
921+ // not sure if we should expand the mul instead?
922+ MOV_REG_TO_REG(add.reg, d.reg);
923+ SHL(as-ms, d.reg);
924+ ADD_REG_TO_REG(temp_reg, d.reg);
925+ }
926+ if(temp_reg == v.reg)
927+ MOV_MEM_TO_REG(v_offset_ebp, EBP, v.reg);
928+ else
929+ scratches.recycle(temp_reg);
930+ }
931+
932+ d.h = ms;
933+ if (mDithering) {
934+ d.l = a.l;
935+ } else {
936+ d.l = fs>a.l ? fs : a.l;
937+ d.flags |= CLEAR_LO;
938+ }
939+}
940+
941+void GGLX86Assembler::component_add(component_t& d,
942+ const integer_t& dst, const integer_t& src)
943+{
944+ // here we're guaranteed that fragment.size() >= fb.size()
945+ const int shift = src.size() - dst.size();
946+ if (!shift) {
947+ MOV_REG_TO_REG(src.reg, d.reg);
948+ ADD_REG_TO_REG(dst.reg, d.reg);
949+ } else {
950+ MOV_REG_TO_REG(dst.reg, d.reg);
951+ SHL(shift, d.reg);
952+ ADD_REG_TO_REG(src.reg, d.reg);
953+ }
954+
955+ d.h = src.size();
956+ if (mDithering) {
957+ d.l = 0;
958+ } else {
959+ d.l = shift;
960+ d.flags |= CLEAR_LO;
961+ }
962+}
963+
964+void GGLX86Assembler::component_sat(const component_t& v, const int temp_reg)
965+{
966+ const int32_t one = ((1<<v.size())-1)<<v.l;
967+ MOV_IMM_TO_REG(one, temp_reg);
968+ CMP_IMM_TO_REG(1<<v.h, v.reg);
969+ CMOV_REG_TO_REG(Mnemonic_CMOVAE, temp_reg, v.reg);
970+}
971+
972+// ----------------------------------------------------------------------------
973+
974+}; // namespace android
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/Android.mk
@@ -0,0 +1,30 @@
1+#
2+# Copyright (C) 2015 The Android-x86 Open Source Project
3+#
4+# Licensed under the Apache License, Version 2.0 (the "License");
5+# you may not use this file except in compliance with the License.
6+# You may obtain a copy of the License at
7+#
8+# http://www.apache.org/licenses/LICENSE-2.0
9+#
10+# Unless required by applicable law or agreed to in writing, software
11+# distributed under the License is distributed on an "AS IS" BASIS,
12+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+# See the License for the specific language governing permissions and
14+# limitations under the License.
15+#
16+
17+LOCAL_PATH := $(call my-dir)
18+
19+enc_src_files := \
20+ dec_base.cpp \
21+ enc_base.cpp \
22+ enc_tabl.cpp \
23+ enc_wrapper.cpp
24+
25+include $(CLEAR_VARS)
26+LOCAL_SRC_FILES := $(enc_src_files)
27+LOCAL_MODULE := libenc
28+LOCAL_MODULE_TAGS := optional
29+LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)
30+include $(BUILD_STATIC_LIBRARY)
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/README.txt
@@ -0,0 +1,21 @@
1+Original source from Apache Harmony 5.0M15 (r991518 from 2010-09-01) at
2+http://harmony.apache.org/.
3+
4+The following files are from drlvm/vm/port/src/encoder/ia32_em64t.
5+
6+ dec_base.cpp
7+ dec_base.h
8+ enc_base.cpp
9+ enc_base.h
10+ enc_defs.h
11+ enc_prvt.h
12+ enc_tabl.cpp
13+ encoder.cpp
14+ encoder.h
15+ encoder.inl
16+
17+The following files are derived partially from the original Apache
18+Harmony files.
19+
20+ enc_defs_ext.h -- derived from enc_defs.h
21+ enc_wrapper.h -- derived from encoder.h
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/dec_base.cpp
@@ -0,0 +1,541 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+
21+/**
22+ * @file
23+ * @brief Main decoding (disassembling) routines implementation.
24+ */
25+
26+#include "dec_base.h"
27+#include "enc_prvt.h"
28+#include <stdio.h>
29+//#include "open/common.h"
30+
31+bool DecoderBase::is_prefix(const unsigned char * bytes)
32+{
33+ unsigned char b0 = *bytes;
34+ unsigned char b1 = *(bytes+1);
35+ if (b0 == 0xF0) { // LOCK
36+ return true;
37+ }
38+ if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes
39+ if (b1 == 0x0F) { // .... but may be a part of SIMD opcode
40+ return false;
41+ }
42+ return true;
43+ }
44+ if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) {
45+ // branch hints, segment prefixes
46+ return true;
47+ }
48+ if (b0==0x66) { // operand-size prefix
49+ if (b1 == 0x0F) { // .... but may be a part of SIMD opcode
50+ return false;
51+ }
52+ return false; //XXX - currently considered as part of opcode//true;
53+ }
54+ if (b0==0x67) { // address size prefix
55+ return true;
56+ }
57+ return false;
58+}
59+
60+// Returns prefix count from 0 to 4, or ((unsigned int)-1) on error
61+unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst)
62+{
63+ const unsigned char * my_bytes = bytes;
64+
65+ while( 1 )
66+ {
67+ unsigned char by1 = *my_bytes;
68+ unsigned char by2 = *(my_bytes + 1);
69+ Inst::PrefGroups where;
70+
71+ switch( by1 )
72+ {
73+ case InstPrefix_REPNE:
74+ case InstPrefix_REP:
75+ {
76+ if( 0x0F == by2)
77+ {
78+ return pinst->prefc;
79+ }
80+ }
81+ case InstPrefix_LOCK:
82+ {
83+ where = Inst::Group1;
84+ break;
85+ }
86+ case InstPrefix_CS:
87+ case InstPrefix_SS:
88+ case InstPrefix_DS:
89+ case InstPrefix_ES:
90+ case InstPrefix_FS:
91+ case InstPrefix_GS:
92+// case InstPrefix_HintTaken: the same as CS override
93+// case InstPrefix_HintNotTaken: the same as DS override
94+ {
95+ where = Inst::Group2;
96+ break;
97+ }
98+ case InstPrefix_OpndSize:
99+ {
100+//NOTE: prefix does not work for JMP Sz16, the opcode is 0x66 0xe9
101+// here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9
102+// it will match JMP Sz32 ...
103+//HACK: assume it is the last prefix, return any way
104+ if( 0x0F == by2)
105+ {
106+ return pinst->prefc;
107+ }
108+ return pinst->prefc;
109+ where = Inst::Group3;
110+ break;
111+ }
112+ case InstPrefix_AddrSize:
113+ {
114+ where = Inst::Group4;
115+ break;
116+ }
117+ default:
118+ {
119+ return pinst->prefc;
120+ }
121+ }
122+ // Assertions are not allowed here.
123+ // Error situations should result in returning error status
124+ if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group
125+ return (unsigned int)-1;
126+
127+ pinst->pref[where] = (InstPrefix)by1;
128+
129+ if (pinst->prefc >= 4) //no more than 4 prefixes
130+ return (unsigned int)-1;
131+
132+ pinst->prefc++;
133+ ++my_bytes;
134+ }
135+}
136+
137+
138+
139+unsigned DecoderBase::decode(const void * addr, Inst * pinst)
140+{
141+ Inst tmp;
142+
143+ //assert( *(unsigned char*)addr != 0x66);
144+
145+ const unsigned char * bytes = (unsigned char*)addr;
146+
147+ // Load up to 4 prefixes
148+ // for each Mnemonic
149+ unsigned int pref_count = fill_prefs(bytes, &tmp);
150+
151+ if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes
152+ return 0; // Error
153+
154+ bytes += pref_count;
155+
156+ // for each opcodedesc
157+ // if (raw_len == 0) memcmp(, raw_len)
158+ // else check the mixed state which is one of the following:
159+ // /digit /i /rw /rd /rb
160+
161+ bool found = false;
162+ const unsigned char * saveBytes = bytes;
163+ for (unsigned mn=1; mn<Mnemonic_Count; mn++) {
164+ bytes = saveBytes;
165+ found=try_mn((Mnemonic)mn, &bytes, &tmp);
166+ if (found) {
167+ tmp.mn = (Mnemonic)mn;
168+ break;
169+ }
170+ }
171+ if (!found) {
172+ // Unknown opcode
173+ return 0;
174+ }
175+ tmp.size = (unsigned)(bytes-(const unsigned char*)addr);
176+ if (pinst) {
177+ *pinst = tmp;
178+ }
179+ return tmp.size;
180+}
181+
182+#ifdef _EM64T_
183+#define EXTEND_REG(reg, flag) \
184+ ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8))
185+#else
186+#define EXTEND_REG(reg, flag) (reg)
187+#endif
188+
189+//don't know the use of rex, seems not used when _EM64T_ is not enabled
190+bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux,
191+ const unsigned char ** pbuf, Inst * pinst
192+#ifdef _EM64T_
193+ , const Rex UNREF *rex
194+#endif
195+ )
196+{
197+ OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask);
198+ unsigned byte = (aux & OpcodeByteKind_OpcodeMask);
199+ unsigned data_byte = **pbuf;
200+ EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
201+ const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
202+
203+ switch (kind) {
204+ case OpcodeByteKind_SlashR:
205+ {
206+ RegName reg;
207+ OpndKind okind;
208+ const ModRM& modrm = *(ModRM*)*pbuf;
209+ if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory
210+#ifdef _EM64T_
211+ decodeModRM(odesc, pbuf, pinst, rex);
212+#else
213+ decodeModRM(odesc, pbuf, pinst);
214+#endif
215+ ++pinst->argc;
216+ const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc];
217+ okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
218+ EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
219+ reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r));
220+ regOpnd = EncoderBase::Operand(reg);
221+ } else { // 2nd operand is memory
222+ okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
223+ EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
224+ reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r));
225+ regOpnd = EncoderBase::Operand(reg);
226+ ++pinst->argc;
227+#ifdef _EM64T_
228+ decodeModRM(odesc, pbuf, pinst, rex);
229+#else
230+ decodeModRM(odesc, pbuf, pinst);
231+#endif
232+ }
233+ ++pinst->argc;
234+ }
235+ return true;
236+ case OpcodeByteKind_rb:
237+ case OpcodeByteKind_rw:
238+ case OpcodeByteKind_rd:
239+ {
240+ // Gregory -
241+ // Here we don't parse register because for current needs
242+ // disassembler doesn't require to parse all operands
243+ unsigned regid = data_byte - byte;
244+ if (regid>7) {
245+ return false;
246+ }
247+ OpndSize opnd_size;
248+ switch(kind)
249+ {
250+ case OpcodeByteKind_rb:
251+ {
252+ opnd_size = OpndSize_8;
253+ break;
254+ }
255+ case OpcodeByteKind_rw:
256+ {
257+ opnd_size = OpndSize_16;
258+ break;
259+ }
260+ case OpcodeByteKind_rd:
261+ {
262+ opnd_size = OpndSize_32;
263+ break;
264+ }
265+ default:
266+ opnd_size = OpndSize_32; // so there is no compiler warning
267+ assert( false );
268+ }
269+ opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) );
270+
271+ ++pinst->argc;
272+ ++*pbuf;
273+ return true;
274+ }
275+ case OpcodeByteKind_cb:
276+ {
277+ char offset = *(char*)*pbuf;
278+ *pbuf += 1;
279+ opnd = EncoderBase::Operand(offset);
280+ ++pinst->argc;
281+ //pinst->direct_addr = (void*)(pinst->offset + *pbuf);
282+ }
283+ return true;
284+ case OpcodeByteKind_cw:
285+ // not an error, but not expected in current env
286+ // Android x86
287+ {
288+ short offset = *(short*)*pbuf;
289+ *pbuf += 2;
290+ opnd = EncoderBase::Operand(offset);
291+ ++pinst->argc;
292+ }
293+ return true;
294+ //return false;
295+ case OpcodeByteKind_cd:
296+ {
297+ int offset = *(int*)*pbuf;
298+ *pbuf += 4;
299+ opnd = EncoderBase::Operand(offset);
300+ ++pinst->argc;
301+ }
302+ return true;
303+ case OpcodeByteKind_SlashNum:
304+ {
305+ const ModRM& modrm = *(ModRM*)*pbuf;
306+ if (modrm.reg != byte) {
307+ return false;
308+ }
309+ decodeModRM(odesc, pbuf, pinst
310+#ifdef _EM64T_
311+ , rex
312+#endif
313+ );
314+ ++pinst->argc;
315+ }
316+ return true;
317+ case OpcodeByteKind_ib:
318+ {
319+ char ival = *(char*)*pbuf;
320+ opnd = EncoderBase::Operand(ival);
321+ ++pinst->argc;
322+ *pbuf += 1;
323+ }
324+ return true;
325+ case OpcodeByteKind_iw:
326+ {
327+ short ival = *(short*)*pbuf;
328+ opnd = EncoderBase::Operand(ival);
329+ ++pinst->argc;
330+ *pbuf += 2;
331+ }
332+ return true;
333+ case OpcodeByteKind_id:
334+ {
335+ int ival = *(int*)*pbuf;
336+ opnd = EncoderBase::Operand(ival);
337+ ++pinst->argc;
338+ *pbuf += 4;
339+ }
340+ return true;
341+#ifdef _EM64T_
342+ case OpcodeByteKind_io:
343+ {
344+ long long int ival = *(long long int*)*pbuf;
345+ opnd = EncoderBase::Operand(OpndSize_64, ival);
346+ ++pinst->argc;
347+ *pbuf += 8;
348+ }
349+ return true;
350+#endif
351+ case OpcodeByteKind_plus_i:
352+ {
353+ unsigned regid = data_byte - byte;
354+ if (regid>7) {
355+ return false;
356+ }
357+ ++*pbuf;
358+ return true;
359+ }
360+ case OpcodeByteKind_ZeroOpcodeByte: // cant be here
361+ return false;
362+ default:
363+ // unknown kind ? how comes ?
364+ break;
365+ }
366+ return false;
367+}
368+
369+bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) {
370+ const unsigned char * save_pbuf = *pbuf;
371+ EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn];
372+
373+ for (unsigned i=0; !opcodes[i].last; i++) {
374+ const EncoderBase::OpcodeDesc& odesc = opcodes[i];
375+ char *opcode_ptr = const_cast<char *>(odesc.opcode);
376+ int opcode_len = odesc.opcode_len;
377+#ifdef _EM64T_
378+ Rex *prex = NULL;
379+ Rex rex;
380+#endif
381+
382+ *pbuf = save_pbuf;
383+#ifdef _EM64T_
384+ // Match REX prefixes
385+ unsigned char rex_byte = (*pbuf)[0];
386+ if ((rex_byte & 0xf0) == 0x40)
387+ {
388+ if ((rex_byte & 0x08) != 0)
389+ {
390+ // Have REX.W
391+ if (opcode_len > 0 && opcode_ptr[0] == 0x48)
392+ {
393+ // Have REX.W in opcode. All mnemonics that allow
394+ // REX.W have to have specified it in opcode,
395+ // otherwise it is not allowed
396+ rex = *(Rex *)*pbuf;
397+ prex = &rex;
398+ (*pbuf)++;
399+ opcode_ptr++;
400+ opcode_len--;
401+ }
402+ }
403+ else
404+ {
405+ // No REX.W, so it doesn't have to be in opcode. We
406+ // have REX.B, REX.X, REX.R or their combination, but
407+ // not in opcode, they may extend any part of the
408+ // instruction
409+ rex = *(Rex *)*pbuf;
410+ prex = &rex;
411+ (*pbuf)++;
412+ }
413+ }
414+#endif
415+ if (opcode_len != 0) {
416+ if (memcmp(*pbuf, opcode_ptr, opcode_len)) {
417+ continue;
418+ }
419+ *pbuf += opcode_len;
420+ }
421+ if (odesc.aux0 != 0) {
422+
423+ if (!decode_aux(odesc, odesc.aux0, pbuf, pinst
424+#ifdef _EM64T_
425+ , prex
426+#endif
427+ )) {
428+ continue;
429+ }
430+ if (odesc.aux1 != 0) {
431+ if (!decode_aux(odesc, odesc.aux1, pbuf, pinst
432+#ifdef _EM64T_
433+ , prex
434+#endif
435+ )) {
436+ continue;
437+ }
438+ }
439+ pinst->odesc = &opcodes[i];
440+ return true;
441+ }
442+ else {
443+ // Can't have empty opcode
444+ assert(opcode_len != 0);
445+ pinst->odesc = &opcodes[i];
446+ return true;
447+ }
448+ }
449+ return false;
450+}
451+
452+bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc,
453+ const unsigned char ** pbuf, Inst * pinst
454+#ifdef _EM64T_
455+ , const Rex *rex
456+#endif
457+ )
458+{
459+ EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
460+ const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
461+
462+ //XXX debug ///assert(0x66 != *(*pbuf-2));
463+ const ModRM& modrm = *(ModRM*)*pbuf;
464+ *pbuf += 1;
465+
466+ RegName base = RegName_Null;
467+ RegName index = RegName_Null;
468+ int disp = 0;
469+ unsigned scale = 0;
470+
471+ // On x86_64 all mnemonics that allow REX.W have REX.W in opcode.
472+ // Therefore REX.W is simply ignored, and opndDesc.size is used
473+
474+ if (modrm.mod == 3) {
475+ // we have only modrm. no sib, no disp.
476+ // Android x86: Use XMMReg for 64b operand.
477+ OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
478+ RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b));
479+ opnd = EncoderBase::Operand(reg);
480+ return true;
481+ }
482+ //Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory
483+ //base and index should be 32 bits!!!
484+ const SIB& sib = *(SIB*)*pbuf;
485+ // check whether we have a sib
486+ if (modrm.rm == 4) {
487+ // yes, we have SIB
488+ *pbuf += 1;
489+ if (sib.index != 4) {
490+ index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size
491+ } else {
492+ // (sib.index == 4) => no index
493+ //%esp can't be sib.index
494+ }
495+
496+ // scale = sib.scale == 0 ? 0 : (1<<sib.scale);
497+ // scale = (1<<sib.scale);
498+ scale = (index == RegName_Null) ? 0 : (1<<sib.scale);
499+
500+ if (sib.base != 5 || modrm.mod != 0) {
501+ base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size
502+ } else {
503+ // (sib.base == 5 && modrm.mod == 0) => no base
504+ }
505+ }
506+ else {
507+ if (modrm.mod != 0 || modrm.rm != 5) {
508+ base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size
509+ }
510+ else {
511+ // mod=0 && rm == 5 => only disp32
512+ }
513+ }
514+
515+ //update disp and pbuf
516+ if (modrm.mod == 2) {
517+ // have disp32
518+ disp = *(int*)*pbuf;
519+ *pbuf += 4;
520+ }
521+ else if (modrm.mod == 1) {
522+ // have disp8
523+ disp = *(char*)*pbuf;
524+ *pbuf += 1;
525+ }
526+ else {
527+ assert(modrm.mod == 0);
528+ if (modrm.rm == 5) {
529+ // have disp32 w/o sib
530+ disp = *(int*)*pbuf;
531+ *pbuf += 4;
532+ }
533+ else if (modrm.rm == 4 && sib.base == 5) {
534+ // have disp32 with SI in sib
535+ disp = *(int*)*pbuf;
536+ *pbuf += 4;
537+ }
538+ }
539+ opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp);
540+ return true;
541+}
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/dec_base.h
@@ -0,0 +1,135 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+
21+/**
22+ * @file
23+ * @brief Main decoding (disassembling) routines and structures.
24+ *
25+ * @note Quick and rough implementation, subject for a change.
26+ */
27+
28+#ifndef __DEC_BASE_H_INCLUDED__
29+#define __DEC_BASE_H_INCLUDED__
30+
31+
32+#include "enc_base.h"
33+#include "enc_prvt.h"
34+
35+#ifdef ENCODER_ISOLATE
36+using namespace enc_ia32;
37+#endif
38+
39+#define IF_CONDITIONAL (0x00000000)
40+#define IF_SYMMETRIC (0x00000000)
41+#define IF_BRANCH (0x00000000)
42+
43+struct Inst {
44+ Inst() {
45+ mn = Mnemonic_Null;
46+ prefc = 0;
47+ size = 0;
48+ flags = 0;
49+ //offset = 0;
50+ //direct_addr = NULL;
51+ argc = 0;
52+ for(int i = 0; i < 4; ++i)
53+ {
54+ pref[i] = InstPrefix_Null;
55+ }
56+ }
57+ /**
58+ * Mnemonic of the instruction.s
59+ */
60+ Mnemonic mn;
61+ /**
62+ * Enumerating of indexes in the pref array.
63+ */
64+ enum PrefGroups
65+ {
66+ Group1 = 0,
67+ Group2,
68+ Group3,
69+ Group4
70+ };
71+ /**
72+ * Number of prefixes (1 byte each).
73+ */
74+ unsigned int prefc;
75+ /**
76+ * Instruction prefixes. Prefix should be placed here according to its group.
77+ */
78+ InstPrefix pref[4];
79+ /**
80+ * Size, in bytes, of the instruction.
81+ */
82+ unsigned size;
83+ /**
84+ * Flags of the instruction.
85+ * @see MF_
86+ */
87+ unsigned flags;
88+ /**
89+ * An offset of target address, in case of 'CALL offset',
90+ * 'JMP/Jcc offset'.
91+ */
92+ //int offset;
93+ /**
94+ * Direct address of the target (on Intel64/IA-32 is 'instruction IP' +
95+ * 'instruction length' + offset).
96+ */
97+ //void * direct_addr;
98+ /**
99+ * Number of arguments of the instruction.
100+ */
101+ unsigned argc;
102+ //
103+ EncoderBase::Operand operands[3];
104+ //
105+ const EncoderBase::OpcodeDesc * odesc;
106+};
107+
108+inline bool is_jcc(Mnemonic mn)
109+{
110+ return Mnemonic_JO <= mn && mn<=Mnemonic_JG;
111+}
112+
113+class DecoderBase {
114+public:
115+ static unsigned decode(const void * addr, Inst * pinst);
116+private:
117+ static bool decodeModRM(const EncoderBase::OpcodeDesc& odesc,
118+ const unsigned char ** pbuf, Inst * pinst
119+#ifdef _EM64T_
120+ , const Rex *rex
121+#endif
122+ );
123+ static bool decode_aux(const EncoderBase::OpcodeDesc& odesc,
124+ unsigned aux, const unsigned char ** pbuf,
125+ Inst * pinst
126+#ifdef _EM64T_
127+ , const Rex *rex
128+#endif
129+ );
130+ static bool try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst);
131+ static unsigned int fill_prefs( const unsigned char * bytes, Inst * pinst);
132+ static bool is_prefix(const unsigned char * bytes);
133+};
134+
135+#endif // ~ __DEC_BASE_H_INCLUDED__
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/enc_base.cpp
@@ -0,0 +1,1137 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+#include "enc_base.h"
21+//#include <climits>
22+#include <string.h>
23+#define USE_ENCODER_DEFINES
24+#include "enc_prvt.h"
25+#include <stdio.h>
26+
27+//#define JET_PROTO
28+
29+#ifdef JET_PROTO
30+#include "dec_base.h"
31+#include "jvmti_dasm.h"
32+#endif
33+
34+ENCODER_NAMESPACE_START
35+
36+/**
37+ * @file
38+ * @brief Main encoding routines and structures.
39+ */
40+
41+#ifndef _WIN32
42+ #define strcmpi strcasecmp
43+#endif
44+
45+int EncoderBase::dummy = EncoderBase::buildTable();
46+
47+const unsigned char EncoderBase::size_hash[OpndSize_64+1] = {
48+ //
49+ 0xFF, // OpndSize_Null = 0,
50+ 3, // OpndSize_8 = 0x1,
51+ 2, // OpndSize_16 = 0x2,
52+ 0xFF, // 0x3
53+ 1, // OpndSize_32 = 0x4,
54+ 0xFF, // 0x5
55+ 0xFF, // 0x6
56+ 0xFF, // 0x7
57+ 0, // OpndSize_64 = 0x8,
58+ //
59+};
60+
61+const unsigned char EncoderBase::kind_hash[OpndKind_Mem+1] = {
62+ //
63+ //gp reg -> 000 = 0
64+ //memory -> 001 = 1
65+ //immediate -> 010 = 2
66+ //xmm reg -> 011 = 3
67+ //segment regs -> 100 = 4
68+ //fp reg -> 101 = 5
69+ //mmx reg -> 110 = 6
70+ //
71+ 0xFF, // 0 OpndKind_Null=0,
72+ 0<<2, // 1 OpndKind_GPReg =
73+ // OpndKind_MinRegKind=0x1,
74+ 4<<2, // 2 OpndKind_SReg=0x2,
75+
76+#ifdef _HAVE_MMX_
77+ 6<<2, // 3
78+#else
79+ 0xFF, // 3
80+#endif
81+
82+ 5<<2, // 4 OpndKind_FPReg=0x4,
83+ 0xFF, 0xFF, 0xFF, // 5, 6, 7
84+ 3<<2, // OpndKind_XMMReg=0x8,
85+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 9, 0xA, 0xB, 0xC, 0xD,
86+ // 0xE, 0xF
87+ 0xFF, // OpndKind_MaxRegKind =
88+ // OpndKind_StatusReg =
89+ // OpndKind_OtherReg=0x10,
90+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x11-0x18
91+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x19-0x1F
92+ 2<<2, // OpndKind_Immediate=0x20,
93+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x21-0x28
94+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x29-0x30
95+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x31-0x38
96+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x39-0x3F
97+ 1<<2, // OpndKind_Memory=0x40
98+};
99+
100+char * EncoderBase::curRelOpnd[3];
101+
102+char* EncoderBase::encode_aux(char* stream, unsigned aux,
103+ const Operands& opnds, const OpcodeDesc * odesc,
104+ unsigned * pargsCount, Rex * prex)
105+{
106+ const unsigned byte = aux;
107+ OpcodeByteKind kind = (OpcodeByteKind)(byte & OpcodeByteKind_KindMask);
108+ // The '>>' here is to force the switch to be table-based) instead of
109+ // set of CMP+Jcc.
110+ if (*pargsCount >= COUNTOF(opnds)) {
111+ assert(false);
112+ return stream;
113+ }
114+ switch(kind>>8) {
115+ case OpcodeByteKind_SlashR>>8:
116+ // /r - Indicates that the ModR/M byte of the instruction contains
117+ // both a register operand and an r/m operand.
118+ {
119+ assert(opnds.count() > 1);
120+ // not true anymore for MOVQ xmm<->r
121+ //assert((odesc->opnds[0].kind & OpndKind_Mem) ||
122+ // (odesc->opnds[1].kind & OpndKind_Mem));
123+ unsigned memidx = odesc->opnds[0].kind & OpndKind_Mem ? 0 : 1;
124+ unsigned regidx = memidx == 0 ? 1 : 0;
125+ memidx += *pargsCount;
126+ regidx += *pargsCount;
127+ ModRM& modrm = *(ModRM*)stream;
128+ if (memidx >= COUNTOF(opnds) || regidx >= COUNTOF(opnds)) {
129+ assert(false);
130+ break;
131+ }
132+ if (opnds[memidx].is_mem()) {
133+ stream = encodeModRM(stream, opnds, memidx, odesc, prex);
134+ }
135+ else {
136+ modrm.mod = 3; // 11
137+ modrm.rm = getHWRegIndex(opnds[memidx].reg());
138+#ifdef _EM64T_
139+ if (opnds[memidx].need_rex() && needs_rex_r(opnds[memidx].reg())) {
140+ prex->b = 1;
141+ }
142+#endif
143+ ++stream;
144+ }
145+ modrm.reg = getHWRegIndex(opnds[regidx].reg());
146+#ifdef _EM64T_
147+ if (opnds[regidx].need_rex() && needs_rex_r(opnds[regidx].reg())) {
148+ prex->r = 1;
149+ }
150+#endif
151+ *pargsCount += 2;
152+ }
153+ break;
154+ case OpcodeByteKind_SlashNum>>8:
155+ // /digit - A digit between 0 and 7 indicates that the
156+ // ModR/M byte of the instruction uses only the r/m
157+ // (register or memory) operand. The reg field contains
158+ // the digit that provides an extension to the instruction's
159+ // opcode.
160+ {
161+ const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
162+ assert(lowByte <= 7);
163+ ModRM& modrm = *(ModRM*)stream;
164+ unsigned idx = *pargsCount;
165+ assert(opnds[idx].is_mem() || opnds[idx].is_reg());
166+ if (opnds[idx].is_mem()) {
167+ stream = encodeModRM(stream, opnds, idx, odesc, prex);
168+ }
169+ else {
170+ modrm.mod = 3; // 11
171+ modrm.rm = getHWRegIndex(opnds[idx].reg());
172+#ifdef _EM64T_
173+ if (opnds[idx].need_rex() && needs_rex_r(opnds[idx].reg())) {
174+ prex->b = 1;
175+ }
176+#endif
177+ ++stream;
178+ }
179+ modrm.reg = (char)lowByte;
180+ *pargsCount += 1;
181+ }
182+ break;
183+ case OpcodeByteKind_plus_i>>8:
184+ // +i - A number used in floating-point instructions when one
185+ // of the operands is ST(i) from the FPU register stack. The
186+ // number i (which can range from 0 to 7) is added to the
187+ // hexadecimal byte given at the left of the plus sign to form
188+ // a single opcode byte.
189+ {
190+ unsigned idx = *pargsCount;
191+ const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
192+ *stream = (char)lowByte + getHWRegIndex(opnds[idx].reg());
193+ ++stream;
194+ *pargsCount += 1;
195+ }
196+ break;
197+ case OpcodeByteKind_ib>>8:
198+ case OpcodeByteKind_iw>>8:
199+ case OpcodeByteKind_id>>8:
200+#ifdef _EM64T_
201+ case OpcodeByteKind_io>>8:
202+#endif //_EM64T_
203+ // ib, iw, id - A 1-byte (ib), 2-byte (iw), or 4-byte (id)
204+ // immediate operand to the instruction that follows the
205+ // opcode, ModR/M bytes or scale-indexing bytes. The opcode
206+ // determines if the operand is a signed value. All words
207+ // and double words are given with the low-order byte first.
208+ {
209+ unsigned idx = *pargsCount;
210+ *pargsCount += 1;
211+ assert(opnds[idx].is_imm());
212+ if (kind == OpcodeByteKind_ib) {
213+ *(unsigned char*)stream = (unsigned char)opnds[idx].imm();
214+ curRelOpnd[idx] = stream;
215+ stream += 1;
216+ }
217+ else if (kind == OpcodeByteKind_iw) {
218+ *(unsigned short*)stream = (unsigned short)opnds[idx].imm();
219+ curRelOpnd[idx] = stream;
220+ stream += 2;
221+ }
222+ else if (kind == OpcodeByteKind_id) {
223+ *(unsigned*)stream = (unsigned)opnds[idx].imm();
224+ curRelOpnd[idx] = stream;
225+ stream += 4;
226+ }
227+#ifdef _EM64T_
228+ else {
229+ assert(kind == OpcodeByteKind_io);
230+ *(long long*)stream = (long long)opnds[idx].imm();
231+ curRelOpnd[idx] = stream;
232+ stream += 8;
233+ }
234+#else
235+ else {
236+ assert(false);
237+ }
238+#endif
239+ }
240+ break;
241+ case OpcodeByteKind_cb>>8:
242+ assert(opnds[*pargsCount].is_imm());
243+ *(unsigned char*)stream = (unsigned char)opnds[*pargsCount].imm();
244+ curRelOpnd[*pargsCount]= stream;
245+ stream += 1;
246+ *pargsCount += 1;
247+ break;
248+ case OpcodeByteKind_cw>>8:
249+ assert(opnds[*pargsCount].is_imm());
250+ *(unsigned short*)stream = (unsigned short)opnds[*pargsCount].imm();
251+ curRelOpnd[*pargsCount]= stream;
252+ stream += 2;
253+ *pargsCount += 1;
254+ break;
255+ case OpcodeByteKind_cd>>8:
256+ assert(opnds[*pargsCount].is_imm());
257+ *(unsigned*)stream = (unsigned)opnds[*pargsCount].imm();
258+ curRelOpnd[*pargsCount]= stream;
259+ stream += 4;
260+ *pargsCount += 1;
261+ break;
262+ //OpcodeByteKind_cp = 0x0B00,
263+ //OpcodeByteKind_co = 0x0C00,
264+ //OpcodeByteKind_ct = 0x0D00,
265+ case OpcodeByteKind_rb>>8:
266+ case OpcodeByteKind_rw>>8:
267+ case OpcodeByteKind_rd>>8:
268+ // +rb, +rw, +rd - A register code, from 0 through 7,
269+ // added to the hexadecimal byte given at the left of
270+ // the plus sign to form a single opcode byte.
271+ assert(opnds.count() > 0);
272+ assert(opnds[*pargsCount].is_reg());
273+ {
274+ const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
275+ *(unsigned char*)stream = (unsigned char)lowByte +
276+ getHWRegIndex(opnds[*pargsCount].reg());
277+#ifdef _EM64T_
278+ if (opnds[*pargsCount].need_rex() && needs_rex_r(opnds[*pargsCount].reg())) {
279+ prex->b = 1;
280+ }
281+#endif
282+ ++stream;
283+ *pargsCount += 1;
284+ }
285+ break;
286+ default:
287+ assert(false);
288+ break;
289+ }
290+ return stream;
291+}
292+
293+char * EncoderBase::encode(char * stream, Mnemonic mn, const Operands& opnds)
294+{
295+#ifdef _DEBUG
296+ if (opnds.count() > 0) {
297+ if (opnds[0].is_mem()) {
298+ assert(getRegKind(opnds[0].base()) != OpndKind_SReg);
299+ }
300+ else if (opnds.count() >1 && opnds[1].is_mem()) {
301+ assert(getRegKind(opnds[1].base()) != OpndKind_SReg);
302+ }
303+ }
304+#endif
305+
306+#ifdef JET_PROTO
307+ char* saveStream = stream;
308+#endif
309+
310+ const OpcodeDesc * odesc = lookup(mn, opnds);
311+#if !defined(_EM64T_)
312+ bool copy_opcode = true;
313+ Rex *prex = NULL;
314+#else
315+ // We need rex if
316+ // either of registers used as operand or address form is new extended register
317+ // it's explicitly specified by opcode
318+ // So, if we don't have REX in opcode but need_rex, then set rex here
319+ // otherwise, wait until opcode is set, and then update REX
320+
321+ bool copy_opcode = true;
322+ unsigned char _1st = odesc->opcode[0];
323+
324+ Rex *prex = (Rex*)stream;
325+ if (opnds.need_rex() &&
326+ ((_1st == 0x66) || (_1st == 0xF2 || _1st == 0xF3) && odesc->opcode[1] == 0x0F)) {
327+ // Special processing
328+ //
329+ copy_opcode = false;
330+ //
331+ *(unsigned char*)stream = _1st;
332+ ++stream;
333+ //
334+ prex = (Rex*)stream;
335+ prex->dummy = 4;
336+ prex->w = 0;
337+ prex->b = 0;
338+ prex->x = 0;
339+ prex->r = 0;
340+ ++stream;
341+ //
342+ memcpy(stream, &odesc->opcode[1], odesc->opcode_len-1);
343+ stream += odesc->opcode_len-1;
344+ }
345+ else if (_1st != 0x48 && opnds.need_rex()) {
346+ prex = (Rex*)stream;
347+ prex->dummy = 4;
348+ prex->w = 0;
349+ prex->b = 0;
350+ prex->x = 0;
351+ prex->r = 0;
352+ ++stream;
353+ }
354+#endif // ifndef EM64T
355+
356+ if (copy_opcode) {
357+ if (odesc->opcode_len==1) {
358+ unsigned char *dest = (unsigned char *) (stream);
359+ unsigned char *src = (unsigned char *) (& (odesc->opcode));
360+ *dest = *src;
361+ }
362+ else if (odesc->opcode_len==2) {
363+ short *dest = (short *) (stream);
364+ void *ptr = (void *) (& (odesc->opcode));
365+ short *src = (short *) (ptr);
366+ *dest = *src;
367+ }
368+ else if (odesc->opcode_len==3) {
369+ unsigned short *dest = (unsigned short *) (stream);
370+ void *ptr = (void *) (& (odesc->opcode));
371+ unsigned short *src = (unsigned short *) (ptr);
372+ *dest = *src;
373+
374+ //Now handle the last part
375+ unsigned char *dest2 = (unsigned char *) (stream + 2);
376+ *dest2 = odesc->opcode[2];
377+ }
378+ else if (odesc->opcode_len==4) {
379+ unsigned int *dest = (unsigned int *) (stream);
380+ void *ptr = (void *) (& (odesc->opcode));
381+ unsigned int *src = (unsigned int *) (ptr);
382+ *dest = *src;
383+ }
384+ stream += odesc->opcode_len;
385+ }
386+
387+ unsigned argsCount = odesc->first_opnd;
388+
389+ if (odesc->aux0) {
390+ stream = encode_aux(stream, odesc->aux0, opnds, odesc, &argsCount, prex);
391+ if (odesc->aux1) {
392+ stream = encode_aux(stream, odesc->aux1, opnds, odesc, &argsCount, prex);
393+ }
394+ }
395+#ifdef JET_PROTO
396+ //saveStream
397+ Inst inst;
398+ unsigned len = DecoderBase::decode(saveStream, &inst);
399+ assert(inst.mn == mn);
400+ assert(len == (unsigned)(stream-saveStream));
401+ if (mn == Mnemonic_CALL || mn == Mnemonic_JMP ||
402+ Mnemonic_RET == mn ||
403+ (Mnemonic_JO<=mn && mn<=Mnemonic_JG)) {
404+ assert(inst.argc == opnds.count());
405+
406+ InstructionDisassembler idi(saveStream);
407+
408+ for (unsigned i=0; i<inst.argc; i++) {
409+ const EncoderBase::Operand& original = opnds[i];
410+ const EncoderBase::Operand& decoded = inst.operands[i];
411+ assert(original.kind() == decoded.kind());
412+ assert(original.size() == decoded.size());
413+ if (original.is_imm()) {
414+ assert(original.imm() == decoded.imm());
415+ assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Imm);
416+ if (mn == Mnemonic_CALL) {
417+ assert(idi.get_type() == InstructionDisassembler::RELATIVE_CALL);
418+ }
419+ else if (mn == Mnemonic_JMP) {
420+ assert(idi.get_type() == InstructionDisassembler::RELATIVE_JUMP);
421+ }
422+ else if (mn == Mnemonic_RET) {
423+ assert(idi.get_type() == InstructionDisassembler::RET);
424+ }
425+ else {
426+ assert(idi.get_type() == InstructionDisassembler::RELATIVE_COND_JUMP);
427+ }
428+ }
429+ else if (original.is_mem()) {
430+ assert(original.base() == decoded.base());
431+ assert(original.index() == decoded.index());
432+ assert(original.scale() == decoded.scale());
433+ assert(original.disp() == decoded.disp());
434+ assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Mem);
435+ if (mn == Mnemonic_CALL) {
436+ assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL);
437+ }
438+ else if (mn == Mnemonic_JMP) {
439+ assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP);
440+ }
441+ else {
442+ assert(false);
443+ }
444+ }
445+ else {
446+ assert(original.is_reg());
447+ assert(original.reg() == decoded.reg());
448+ assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Reg);
449+ if (mn == Mnemonic_CALL) {
450+ assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL);
451+ }
452+ else if (mn == Mnemonic_JMP) {
453+ assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP);
454+ }
455+ else {
456+ assert(false);
457+ }
458+ }
459+ }
460+
461+ Inst inst2;
462+ len = DecoderBase::decode(saveStream, &inst2);
463+ }
464+
465+ // if(idi.get_length_with_prefix() != (int)len) {
466+ //__asm { int 3 };
467+ // }
468+#endif
469+
470+ return stream;
471+}
472+
473+char* EncoderBase::encodeModRM(char* stream, const Operands& opnds,
474+ unsigned idx, const OpcodeDesc * odesc,
475+ Rex * prex)
476+{
477+ const Operand& op = opnds[idx];
478+ assert(op.is_mem());
479+ assert(idx < COUNTOF(curRelOpnd));
480+ ModRM& modrm = *(ModRM*)stream;
481+ ++stream;
482+ SIB& sib = *(SIB*)stream;
483+
484+ // we need SIB if
485+ // we have index & scale (nb: having index w/o base and w/o scale
486+ // treated as error)
487+ // the base is EBP w/o disp, BUT let's use a fake disp8
488+ // the base is ESP (nb: cant have ESP as index)
489+
490+ RegName base = op.base();
491+ // only disp ?..
492+ if (base == RegName_Null && op.index() == RegName_Null) {
493+ assert(op.scale() == 0); // 'scale!=0' has no meaning without index
494+ // ... yes - only have disp
495+ // On EM64T, the simply [disp] addressing means 'RIP-based' one -
496+ // must have to use SIB to encode 'DS: based'
497+#ifdef _EM64T_
498+ modrm.mod = 0; // 00 - ..
499+ modrm.rm = 4; // 100 - have SIB
500+
501+ sib.base = 5; // 101 - none
502+ sib.index = 4; // 100 - none
503+ sib.scale = 0; //
504+ ++stream; // bypass SIB
505+#else
506+ // ignore disp_fits8, always use disp32.
507+ modrm.mod = 0;
508+ modrm.rm = 5;
509+#endif
510+ *(unsigned*)stream = (unsigned)op.disp();
511+ curRelOpnd[idx]= stream;
512+ stream += 4;
513+ return stream;
514+ }
515+
516+ //climits: error when targeting compal
517+#define CHAR_MIN -127
518+#define CHAR_MAX 127
519+ const bool disp_fits8 = CHAR_MIN <= op.disp() && op.disp() <= CHAR_MAX;
520+ /*&& op.base() != RegName_Null - just checked above*/
521+ if (op.index() == RegName_Null && getHWRegIndex(op.base()) != getHWRegIndex(REG_STACK)) {
522+ assert(op.scale() == 0); // 'scale!=0' has no meaning without index
523+ // ... luckily no SIB, only base and may be a disp
524+
525+ // EBP base is a special case. Need to use [EBP] + disp8 form
526+ if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) {
527+ modrm.mod = 0; // mod=00, no disp et all
528+ }
529+ else if (disp_fits8) {
530+ modrm.mod = 1; // mod=01, use disp8
531+ *(unsigned char*)stream = (unsigned char)op.disp();
532+ curRelOpnd[idx]= stream;
533+ ++stream;
534+ }
535+ else {
536+ modrm.mod = 2; // mod=10, use disp32
537+ *(unsigned*)stream = (unsigned)op.disp();
538+ curRelOpnd[idx]= stream;
539+ stream += 4;
540+ }
541+ modrm.rm = getHWRegIndex(op.base());
542+ if (is_em64t_extra_reg(op.base())) {
543+ prex->b = 1;
544+ }
545+ return stream;
546+ }
547+
548+ // cool, we do have SIB.
549+ ++stream; // bypass SIB in stream
550+
551+ // {E|R}SP cannot be scaled index, however, R12 which has the same index in modrm - can
552+ assert(op.index() == RegName_Null || !equals(op.index(), REG_STACK));
553+
554+ // Only GPRegs can be encoded in the SIB
555+ assert(op.base() == RegName_Null ||
556+ getRegKind(op.base()) == OpndKind_GPReg);
557+ assert(op.index() == RegName_Null ||
558+ getRegKind(op.index()) == OpndKind_GPReg);
559+
560+ modrm.rm = 4; // r/m = 100, means 'we have SIB here'
561+ if (op.base() == RegName_Null) {
562+ // no base.
563+ // already checked above if
564+ // the first if() //assert(op.index() != RegName_Null);
565+
566+ modrm.mod = 0; // mod=00 - here it means 'no base, but disp32'
567+ sib.base = 5; // 101 with mod=00 ^^^
568+
569+ // encode at least fake disp32 to avoid having [base=ebp]
570+ *(unsigned*)stream = op.disp();
571+ curRelOpnd[idx]= stream;
572+ stream += 4;
573+
574+ unsigned sc = op.scale();
575+ if (sc == 1 || sc==0) { sib.scale = 0; } // SS=00
576+ else if (sc == 2) { sib.scale = 1; } // SS=01
577+ else if (sc == 4) { sib.scale = 2; } // SS=10
578+ else if (sc == 8) { sib.scale = 3; } // SS=11
579+ sib.index = getHWRegIndex(op.index());
580+ if (is_em64t_extra_reg(op.index())) {
581+ prex->x = 1;
582+ }
583+
584+ return stream;
585+ }
586+
587+ if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) {
588+ modrm.mod = 0; // mod=00, no disp
589+ }
590+ else if (disp_fits8) {
591+ modrm.mod = 1; // mod=01, use disp8
592+ *(unsigned char*)stream = (unsigned char)op.disp();
593+ curRelOpnd[idx]= stream;
594+ stream += 1;
595+ }
596+ else {
597+ modrm.mod = 2; // mod=10, use disp32
598+ *(unsigned*)stream = (unsigned)op.disp();
599+ curRelOpnd[idx]= stream;
600+ stream += 4;
601+ }
602+
603+ if (op.index() == RegName_Null) {
604+ assert(op.scale() == 0); // 'scale!=0' has no meaning without index
605+ // the only reason we're here without index, is that we have {E|R}SP
606+ // or R12 as a base. Another possible reason - EBP without a disp -
607+ // is handled above by adding a fake disp8
608+#ifdef _EM64T_
609+ assert(op.base() != RegName_Null && (equals(op.base(), REG_STACK) ||
610+ equals(op.base(), RegName_R12)));
611+#else // _EM64T_
612+ assert(op.base() != RegName_Null && equals(op.base(), REG_STACK));
613+#endif //_EM64T_
614+ sib.scale = 0; // SS = 00
615+ sib.index = 4; // SS + index=100 means 'no index'
616+ }
617+ else {
618+ unsigned sc = op.scale();
619+ if (sc == 1 || sc==0) { sib.scale = 0; } // SS=00
620+ else if (sc == 2) { sib.scale = 1; } // SS=01
621+ else if (sc == 4) { sib.scale = 2; } // SS=10
622+ else if (sc == 8) { sib.scale = 3; } // SS=11
623+ sib.index = getHWRegIndex(op.index());
624+ if (is_em64t_extra_reg(op.index())) {
625+ prex->x = 1;
626+ }
627+ // not an error by itself, but the usage of [index*1] instead
628+ // of [base] is discouraged
629+ assert(op.base() != RegName_Null || op.scale() != 1);
630+ }
631+ sib.base = getHWRegIndex(op.base());
632+ if (is_em64t_extra_reg(op.base())) {
633+ prex->b = 1;
634+ }
635+ return stream;
636+}
637+
638+char * EncoderBase::nops(char * stream, unsigned howMany)
639+{
640+ // Recommended multi-byte NOPs from the Intel architecture manual
641+ static const unsigned char nops[10][9] = {
642+ { 0, }, // 0, this line is dummy and not used in the loop below
643+ { 0x90, }, // 1-byte NOP
644+ { 0x66, 0x90, }, // 2
645+ { 0x0F, 0x1F, 0x00, }, // 3
646+ { 0x0F, 0x1F, 0x40, 0x00, }, // 4
647+ { 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 5
648+ { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 6
649+ { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, }, // 7
650+ { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, }, // 8
651+ { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }, // 9-byte NOP
652+ };
653+
654+ // Start from delivering the longest possible NOPs, then proceed with shorter ones
655+ for (unsigned nopSize=9; nopSize!=0; nopSize--) {
656+ while(howMany>=nopSize) {
657+ const unsigned char* nopBytes = nops[nopSize];
658+ for (unsigned i=0; i<nopSize; i++) {
659+ stream[i] = nopBytes[i];
660+ }
661+ stream += nopSize;
662+ howMany -= nopSize;
663+ }
664+ }
665+ char* end = stream + howMany;
666+ return end;
667+}
668+
669+char * EncoderBase::prefix(char* stream, InstPrefix pref)
670+{
671+ if (pref== InstPrefix_Null) {
672+ // nothing to do
673+ return stream;
674+ }
675+ *stream = (char)pref;
676+ return stream + 1;
677+}
678+
679+
680+/**
681+ *
682+ */
683+bool EncoderBase::extAllowed(OpndExt opndExt, OpndExt instExt) {
684+ if (instExt == opndExt || instExt == OpndExt_Any || opndExt == OpndExt_Any) {
685+ return true;
686+ }
687+//asm("int3");
688+assert(0);
689+ return false;
690+}
691+
692+static bool try_match(const EncoderBase::OpcodeDesc& odesc,
693+ const EncoderBase::Operands& opnds, bool strict) {
694+
695+ assert(odesc.roles.count == opnds.count());
696+
697+ for(unsigned j=0; j<odesc.roles.count; j++) {
698+ // - the location must match exactly
699+ if ((odesc.opnds[j].kind & opnds[j].kind()) != opnds[j].kind()) {
700+ return false;
701+ }
702+ if (strict) {
703+ // the size must match exactly
704+ if (odesc.opnds[j].size != opnds[j].size()) {
705+ return false;
706+ }
707+ }
708+ else {
709+ // must match only for def operands, and dont care about use ones
710+ // situations like 'mov r8, imm32/mov r32, imm8' so the
711+ // destination operand defines the overall size
712+ if (EncoderBase::getOpndRoles(odesc.roles, j) & OpndRole_Def) {
713+ if (odesc.opnds[j].size != opnds[j].size()) {
714+ return false;
715+ }
716+ }
717+ }
718+ }
719+ return true;
720+}
721+
722+//
723+//Subhash implementaion - may be useful in case of many misses during fast
724+//opcode lookup.
725+//
726+
727+#ifdef ENCODER_USE_SUBHASH
728+static unsigned subHash[32];
729+
730+static unsigned find(Mnemonic mn, unsigned hash)
731+{
732+ unsigned key = hash % COUNTOF(subHash);
733+ unsigned pack = subHash[key];
734+ unsigned _hash = pack & 0xFFFF;
735+ if (_hash != hash) {
736+ stat.miss(mn);
737+ return EncoderBase::NOHASH;
738+ }
739+ unsigned _mn = (pack >> 24)&0xFF;
740+ if (_mn != _mn) {
741+ stat.miss(mn);
742+ return EncoderBase::NOHASH;
743+ }
744+ unsigned idx = (pack >> 16) & 0xFF;
745+ stat.hit(mn);
746+ return idx;
747+}
748+
749+static void put(Mnemonic mn, unsigned hash, unsigned idx)
750+{
751+ unsigned pack = hash | (idx<<16) | (mn << 24);
752+ unsigned key = hash % COUNTOF(subHash);
753+ subHash[key] = pack;
754+}
755+#endif
756+
757+const EncoderBase::OpcodeDesc *
758+EncoderBase::lookup(Mnemonic mn, const Operands& opnds)
759+{
760+ const unsigned hash = opnds.hash();
761+ unsigned opcodeIndex = opcodesHashMap[mn][hash];
762+#ifdef ENCODER_USE_SUBHASH
763+ if (opcodeIndex == NOHASH) {
764+ opcodeIndex = find(mn, hash);
765+ }
766+#endif
767+
768+ if (opcodeIndex == NOHASH) {
769+ // fast-path did no work. try to lookup sequentially
770+ const OpcodeDesc * odesc = opcodes[mn];
771+ int idx = -1;
772+ bool found = false;
773+ for (idx=0; !odesc[idx].last; idx++) {
774+ const OpcodeDesc& opcode = odesc[idx];
775+ if (opcode.platf == OpcodeInfo::decoder) {
776+ continue;
777+ }
778+ if (opcode.roles.count != opnds.count()) {
779+ continue;
780+ }
781+ if (try_match(opcode, opnds, true)) {
782+ found = true;
783+ break;
784+ }
785+ }
786+ if (!found) {
787+ for (idx=0; !odesc[idx].last; idx++) {
788+ const OpcodeDesc& opcode = odesc[idx];
789+ if (opcode.platf == OpcodeInfo::decoder) {
790+ continue;
791+ }
792+ if (opcode.roles.count != opnds.count()) {
793+ continue;
794+ }
795+ if (try_match(opcode, opnds, false)) {
796+ found = true;
797+ break;
798+ }
799+ }
800+ }
801+ assert(found);
802+ opcodeIndex = idx;
803+#ifdef ENCODER_USE_SUBHASH
804+ put(mn, hash, opcodeIndex);
805+#endif
806+ }
807+ assert(opcodeIndex != NOHASH);
808+ const OpcodeDesc * odesc = &opcodes[mn][opcodeIndex];
809+ assert(!odesc->last);
810+ assert(odesc->roles.count == opnds.count());
811+ assert(odesc->platf != OpcodeInfo::decoder);
812+#if !defined(_EM64T_)
813+ // tuning was done for IA32 only, so no size restriction on EM64T
814+ //assert(sizeof(OpcodeDesc)==128);
815+#endif
816+ return odesc;
817+}
818+
819+char* EncoderBase::getOpndLocation(int index) {
820+ assert(index < 3);
821+ return curRelOpnd[index];
822+}
823+
824+
825+Mnemonic EncoderBase::str2mnemonic(const char * mn_name)
826+{
827+ for (unsigned m = 1; m<Mnemonic_Count; m++) {
828+ if (!strcmpi(mnemonics[m].name, mn_name)) {
829+ return (Mnemonic)m;
830+ }
831+ }
832+ return Mnemonic_Null;
833+}
834+
835+static const char * conditionStrings[ConditionMnemonic_Count] = {
836+ "O",
837+ "NO",
838+ "B",
839+ "AE",
840+ "Z",
841+ "NZ",
842+ "BE",
843+ "A",
844+
845+ "S",
846+ "NS",
847+ "P",
848+ "NP",
849+ "L",
850+ "GE",
851+ "LE",
852+ "G",
853+};
854+
855+const char * getConditionString(ConditionMnemonic cm) {
856+ return conditionStrings[cm];
857+}
858+
859+static const struct {
860+ char sizeString[12];
861+ OpndSize size;
862+}
863+sizes[] = {
864+ { "Sz8", OpndSize_8 },
865+ { "Sz16", OpndSize_16 },
866+ { "Sz32", OpndSize_32 },
867+ { "Sz64", OpndSize_64 },
868+#if !defined(TESTING_ENCODER)
869+ { "Sz80", OpndSize_80 },
870+ { "Sz128", OpndSize_128 },
871+#endif
872+ { "SzAny", OpndSize_Any },
873+};
874+
875+
876+OpndSize getOpndSize(const char * sizeString)
877+{
878+ assert(sizeString);
879+ for (unsigned i = 0; i<COUNTOF(sizes); i++) {
880+ if (!strcmpi(sizeString, sizes[i].sizeString)) {
881+ return sizes[i].size;
882+ }
883+ }
884+ return OpndSize_Null;
885+}
886+
887+const char * getOpndSizeString(OpndSize size) {
888+ for( unsigned i = 0; i<COUNTOF(sizes); i++ ) {
889+ if( sizes[i].size==size ) {
890+ return sizes[i].sizeString;
891+ }
892+ }
893+ return NULL;
894+}
895+
896+static const struct {
897+ char kindString[16];
898+ OpndKind kind;
899+}
900+kinds[] = {
901+ { "Null", OpndKind_Null },
902+ { "GPReg", OpndKind_GPReg },
903+ { "SReg", OpndKind_SReg },
904+ { "FPReg", OpndKind_FPReg },
905+ { "XMMReg", OpndKind_XMMReg },
906+#ifdef _HAVE_MMX_
907+ { "MMXReg", OpndKind_MMXReg },
908+#endif
909+ { "StatusReg", OpndKind_StatusReg },
910+ { "Reg", OpndKind_Reg },
911+ { "Imm", OpndKind_Imm },
912+ { "Mem", OpndKind_Mem },
913+ { "Any", OpndKind_Any },
914+};
915+
916+const char * getOpndKindString(OpndKind kind)
917+{
918+ for (unsigned i = 0; i<COUNTOF(kinds); i++) {
919+ if (kinds[i].kind==kind) {
920+ return kinds[i].kindString;
921+ }
922+ }
923+ return NULL;
924+}
925+
926+OpndKind getOpndKind(const char * kindString)
927+{
928+ assert(kindString);
929+ for (unsigned i = 0; i<COUNTOF(kinds); i++) {
930+ if (!strcmpi(kindString, kinds[i].kindString)) {
931+ return kinds[i].kind;
932+ }
933+ }
934+ return OpndKind_Null;
935+}
936+
937+/**
938+ * A mapping between register string representation and its RegName constant.
939+ */
940+static const struct {
941+ char regstring[7];
942+ RegName regname;
943+}
944+
945+registers[] = {
946+#ifdef _EM64T_
947+ {"RAX", RegName_RAX},
948+ {"RBX", RegName_RBX},
949+ {"RCX", RegName_RCX},
950+ {"RDX", RegName_RDX},
951+ {"RBP", RegName_RBP},
952+ {"RSI", RegName_RSI},
953+ {"RDI", RegName_RDI},
954+ {"RSP", RegName_RSP},
955+ {"R8", RegName_R8},
956+ {"R9", RegName_R9},
957+ {"R10", RegName_R10},
958+ {"R11", RegName_R11},
959+ {"R12", RegName_R12},
960+ {"R13", RegName_R13},
961+ {"R14", RegName_R14},
962+ {"R15", RegName_R15},
963+#endif
964+
965+ {"EAX", RegName_EAX},
966+ {"ECX", RegName_ECX},
967+ {"EDX", RegName_EDX},
968+ {"EBX", RegName_EBX},
969+ {"ESP", RegName_ESP},
970+ {"EBP", RegName_EBP},
971+ {"ESI", RegName_ESI},
972+ {"EDI", RegName_EDI},
973+#ifdef _EM64T_
974+ {"R8D", RegName_R8D},
975+ {"R9D", RegName_R9D},
976+ {"R10D", RegName_R10D},
977+ {"R11D", RegName_R11D},
978+ {"R12D", RegName_R12D},
979+ {"R13D", RegName_R13D},
980+ {"R14D", RegName_R14D},
981+ {"R15D", RegName_R15D},
982+#endif
983+
984+ {"AX", RegName_AX},
985+ {"CX", RegName_CX},
986+ {"DX", RegName_DX},
987+ {"BX", RegName_BX},
988+ {"SP", RegName_SP},
989+ {"BP", RegName_BP},
990+ {"SI", RegName_SI},
991+ {"DI", RegName_DI},
992+
993+ {"AL", RegName_AL},
994+ {"CL", RegName_CL},
995+ {"DL", RegName_DL},
996+ {"BL", RegName_BL},
997+#if !defined(_EM64T_)
998+ {"AH", RegName_AH},
999+ {"CH", RegName_CH},
1000+ {"DH", RegName_DH},
1001+ {"BH", RegName_BH},
1002+#else
1003+ {"SPL", RegName_SPL},
1004+ {"BPL", RegName_BPL},
1005+ {"SIL", RegName_SIL},
1006+ {"DIL", RegName_DIL},
1007+ {"R8L", RegName_R8L},
1008+ {"R9L", RegName_R9L},
1009+ {"R10L", RegName_R10L},
1010+ {"R11L", RegName_R11L},
1011+ {"R12L", RegName_R12L},
1012+ {"R13L", RegName_R13L},
1013+ {"R14L", RegName_R14L},
1014+ {"R15L", RegName_R15L},
1015+#endif
1016+ {"ES", RegName_ES},
1017+ {"CS", RegName_CS},
1018+ {"SS", RegName_SS},
1019+ {"DS", RegName_DS},
1020+ {"FS", RegName_FS},
1021+ {"GS", RegName_GS},
1022+
1023+ {"FP0", RegName_FP0},
1024+/*
1025+ {"FP1", RegName_FP1},
1026+ {"FP2", RegName_FP2},
1027+ {"FP3", RegName_FP3},
1028+ {"FP4", RegName_FP4},
1029+ {"FP5", RegName_FP5},
1030+ {"FP6", RegName_FP6},
1031+ {"FP7", RegName_FP7},
1032+*/
1033+ {"FP0S", RegName_FP0S},
1034+ {"FP1S", RegName_FP1S},
1035+ {"FP2S", RegName_FP2S},
1036+ {"FP3S", RegName_FP3S},
1037+ {"FP4S", RegName_FP4S},
1038+ {"FP5S", RegName_FP5S},
1039+ {"FP6S", RegName_FP6S},
1040+ {"FP7S", RegName_FP7S},
1041+
1042+ {"FP0D", RegName_FP0D},
1043+ {"FP1D", RegName_FP1D},
1044+ {"FP2D", RegName_FP2D},
1045+ {"FP3D", RegName_FP3D},
1046+ {"FP4D", RegName_FP4D},
1047+ {"FP5D", RegName_FP5D},
1048+ {"FP6D", RegName_FP6D},
1049+ {"FP7D", RegName_FP7D},
1050+
1051+ {"XMM0", RegName_XMM0},
1052+ {"XMM1", RegName_XMM1},
1053+ {"XMM2", RegName_XMM2},
1054+ {"XMM3", RegName_XMM3},
1055+ {"XMM4", RegName_XMM4},
1056+ {"XMM5", RegName_XMM5},
1057+ {"XMM6", RegName_XMM6},
1058+ {"XMM7", RegName_XMM7},
1059+#ifdef _EM64T_
1060+ {"XMM8", RegName_XMM8},
1061+ {"XMM9", RegName_XMM9},
1062+ {"XMM10", RegName_XMM10},
1063+ {"XMM11", RegName_XMM11},
1064+ {"XMM12", RegName_XMM12},
1065+ {"XMM13", RegName_XMM13},
1066+ {"XMM14", RegName_XMM14},
1067+ {"XMM15", RegName_XMM15},
1068+#endif
1069+
1070+
1071+ {"XMM0S", RegName_XMM0S},
1072+ {"XMM1S", RegName_XMM1S},
1073+ {"XMM2S", RegName_XMM2S},
1074+ {"XMM3S", RegName_XMM3S},
1075+ {"XMM4S", RegName_XMM4S},
1076+ {"XMM5S", RegName_XMM5S},
1077+ {"XMM6S", RegName_XMM6S},
1078+ {"XMM7S", RegName_XMM7S},
1079+#ifdef _EM64T_
1080+ {"XMM8S", RegName_XMM8S},
1081+ {"XMM9S", RegName_XMM9S},
1082+ {"XMM10S", RegName_XMM10S},
1083+ {"XMM11S", RegName_XMM11S},
1084+ {"XMM12S", RegName_XMM12S},
1085+ {"XMM13S", RegName_XMM13S},
1086+ {"XMM14S", RegName_XMM14S},
1087+ {"XMM15S", RegName_XMM15S},
1088+#endif
1089+
1090+ {"XMM0D", RegName_XMM0D},
1091+ {"XMM1D", RegName_XMM1D},
1092+ {"XMM2D", RegName_XMM2D},
1093+ {"XMM3D", RegName_XMM3D},
1094+ {"XMM4D", RegName_XMM4D},
1095+ {"XMM5D", RegName_XMM5D},
1096+ {"XMM6D", RegName_XMM6D},
1097+ {"XMM7D", RegName_XMM7D},
1098+#ifdef _EM64T_
1099+ {"XMM8D", RegName_XMM8D},
1100+ {"XMM9D", RegName_XMM9D},
1101+ {"XMM10D", RegName_XMM10D},
1102+ {"XMM11D", RegName_XMM11D},
1103+ {"XMM12D", RegName_XMM12D},
1104+ {"XMM13D", RegName_XMM13D},
1105+ {"XMM14D", RegName_XMM14D},
1106+ {"XMM15D", RegName_XMM15D},
1107+#endif
1108+
1109+ {"EFLGS", RegName_EFLAGS},
1110+};
1111+
1112+
1113+const char * getRegNameString(RegName reg)
1114+{
1115+ for (unsigned i = 0; i<COUNTOF(registers); i++) {
1116+ if (registers[i].regname == reg) {
1117+ return registers[i].regstring;
1118+ }
1119+ }
1120+ return "(null)";
1121+}
1122+
1123+RegName getRegName(const char * regname)
1124+{
1125+ if (NULL == regname) {
1126+ return RegName_Null;
1127+ }
1128+
1129+ for (unsigned i = 0; i<COUNTOF(registers); i++) {
1130+ if (!strcmpi(regname,registers[i].regstring)) {
1131+ return registers[i].regname;
1132+ }
1133+ }
1134+ return RegName_Null;
1135+}
1136+
1137+ENCODER_NAMESPACE_END
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/enc_base.h
@@ -0,0 +1,748 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+
21+/**
22+ * @file
23+ * @brief Main encoding routines and structures.
24+ */
25+
26+#ifndef __ENC_BASE_H_INCLUDED__
27+#define __ENC_BASE_H_INCLUDED__
28+
29+#include "enc_defs.h"
30+
31+
32+#include <stdlib.h>
33+#include <assert.h>
34+#include <memory.h>
35+
36+ENCODER_NAMESPACE_START
37+struct MnemonicInfo;
38+struct OpcodeInfo;
39+struct Rex;
40+
41+/**
42+ * @brief Basic facilities for generation of processor's instructions.
43+ *
44+ * The class EncoderBase represents the basic facilities for the encoding of
45+ * processor's instructions on IA32 and EM64T platforms.
46+ *
47+ * The class provides general interface to generate the instructions as well
48+ * as to retrieve some static data about instructions (number of arguments,
49+ * their roles, etc).
50+ *
51+ * Currently, the EncoderBase class is used for both LIL and Jitrino code
52+ * generators. Each of these code generators has its own wrapper to adapt
53+ * this general interface for specific needs - see encoder.h for LIL wrappers
54+ * and Ia32Encoder.h for Jitrino's adapter.
55+ *
56+ * Interface is provided through static methods, no instances of EncoderBase
57+ * to be created.
58+ *
59+ * @todo RIP-based addressing on EM64T - it's not yet supported currently.
60+ */
61+class EncoderBase {
62+public:
63+ class Operands;
64+ struct MnemonicDesc;
65+ /**
66+ * @brief Generates processor's instruction.
67+ *
68+ * @param stream - a buffer to generate into
69+ * @param mn - \link Mnemonic mnemonic \endlink of the instruction
70+ * @param opnds - operands for the instruction
71+ * @returns (stream + length of the just generated instruction)
72+ */
73+ static char * encode(char * stream, Mnemonic mn, const Operands& opnds);
74+ static char * getOpndLocation(int index);
75+
76+ /**
77+ * @brief Generates the smallest possible number of NOP-s.
78+ *
79+ * Effectively generates the smallest possible number of instructions,
80+ * which are NOP-s for CPU. Normally used to make a code alignment.
81+ *
82+ * The method inserts exactly number of bytes specified. It's a caller's
83+ * responsibility to make sure the buffer is big enough.
84+ *
85+ * @param stream - buffer where to generate code into, can not be NULL
86+ * @param howMany - how many bytes to fill with NOP-s
87+ * @return \c (stream+howMany)
88+ */
89+ static char * nops(char * stream, unsigned howMany);
90+
91+ /**
92+ * @brief Inserts a prefix into the code buffer.
93+ *
94+ * The method writes no more than one byte into the buffer. This is a
95+ * caller's responsibility to make sure the buffer is big enough.
96+ *
97+ * @param stream - buffer where to insert the prefix
98+ * @param pref - prefix to be inserted. If it's InstPrefix_Null, then
99+ * no action performed and return value is \c stream.
100+ * @return \c (stream+1) if pref is not InstPrefix_Null, or \c stream
101+ * otherwise
102+ */
103+ static char * prefix(char* stream, InstPrefix pref);
104+
105+ /**
106+ * @brief Determines if operand with opndExt suites the position with instExt.
107+ */
108+ static bool extAllowed(OpndExt opndExt, OpndExt instExt);
109+
110+ /**
111+ * @brief Returns MnemonicDesc by the given Mnemonic.
112+ */
113+ static const MnemonicDesc * getMnemonicDesc(Mnemonic mn)
114+ {
115+ assert(mn < Mnemonic_Count);
116+ return mnemonics + mn;
117+ }
118+
119+ /**
120+ * @brief Returns a Mnemonic for the given name.
121+ *
122+ * The lookup is case insensitive, if no mnemonic found for the given
123+ * string, then Mnemonic_Null returned.
124+ */
125+ static Mnemonic str2mnemonic(const char * mn_name);
126+
127+ /**
128+ * @brief Returns a string representation of the given Mnemonic.
129+ *
130+ * If invalid mnemonic passed, then the behavior is unpredictable.
131+ */
132+ static const char * getMnemonicString(Mnemonic mn)
133+ {
134+ return getMnemonicDesc(mn)->name;
135+ }
136+
137+ static const char * toStr(Mnemonic mn)
138+ {
139+ return getMnemonicDesc(mn)->name;
140+ }
141+
142+
143+ /**
144+ * @brief Description of operand.
145+ *
146+ * Description of an operand in opcode - its kind, size or RegName if
147+ * operand must be a particular register.
148+ */
149+ struct OpndDesc {
150+ /**
151+ * @brief Location of the operand.
152+ *
153+ * May be a mask, i.e. OpndKind_Imm|OpndKind_Mem.
154+ */
155+ OpndKind kind;
156+ /**
157+ * @brief Size of the operand.
158+ */
159+ OpndSize size;
160+ /**
161+ * @brief Extention of the operand.
162+ */
163+ OpndExt ext;
164+ /**
165+ * @brief Appropriate RegName if operand must reside on a particular
166+ * register (i.e. CWD/CDQ instructions), RegName_Null
167+ * otherwise.
168+ */
169+ RegName reg;
170+ };
171+
172+ /**
173+ * @brief Description of operands' roles in instruction.
174+ */
175+ struct OpndRolesDesc {
176+ /**
177+ * @brief Total number of operands in the operation.
178+ */
179+ unsigned count;
180+ /**
181+ * @brief Number of defs in the operation.
182+ */
183+ unsigned defCount;
184+ /**
185+ * @brief Number of uses in the operation.
186+ */
187+ unsigned useCount;
188+ /**
189+ * @brief Operand roles, bit-packed.
190+ *
191+ * A bit-packed info about operands' roles. Each operand's role is
192+ * described by two bits, counted from right-to-left - the less
193+ * significant bits (0,1) represent operand#0.
194+ *
195+ * The mask is build by ORing #OpndRole_Def and #OpndRole_Use
196+ * appropriately and shifting left, i.e. operand#0's role would be
197+ * - '(OpndRole_Def|OpndRole_Use)'
198+ * - opnd#1's role would be 'OpndRole_Use<<2'
199+ * - and operand#2's role would be, say, 'OpndRole_Def<<4'.
200+ */
201+ unsigned roles;
202+ };
203+
204+ /**
205+ * @brief Extracts appropriate OpndRole for a given operand.
206+ *
207+ * The order of operands is left-to-right, i.e. for MOV, it
208+ * would be 'MOV op0, op1'
209+ */
210+ static OpndRole getOpndRoles(OpndRolesDesc ord, unsigned idx)
211+ {
212+ assert(idx < ord.count);
213+ return (OpndRole)(ord.roles>>((ord.count-1-idx)*2) & 0x3);
214+ }
215+
216+ /**
217+ * @brief Defines the maximum number of operands for an opcode.
218+ *
219+ * The 3 mostly comes from IDIV/IMUL which both may have up to
220+ * 3 operands.
221+ */
222+ static const unsigned int MAX_NUM_OPCODE_OPERANDS = 3;
223+
224+ /**
225+ * @brief Info about single opcode - its opcode bytes, operands,
226+ * operands' roles.
227+ */
228+ union OpcodeDesc {
229+ char dummy[128]; // To make total size a power of 2
230+
231+ struct {
232+ /**
233+ * @brief Raw opcode bytes.
234+ *
235+ * 'Raw' opcode bytes which do not require any analysis and are
236+ * independent from arguments/sizes/etc (may include opcode size
237+ * prefix).
238+ */
239+ char opcode[5];
240+ unsigned opcode_len;
241+ unsigned aux0;
242+ unsigned aux1;
243+ /**
244+ * @brief Info about opcode's operands.
245+ */
246+ OpndDesc opnds[MAX_NUM_OPCODE_OPERANDS];
247+ unsigned first_opnd;
248+ /**
249+ * @brief Info about operands - total number, number of uses/defs,
250+ * operands' roles.
251+ */
252+ OpndRolesDesc roles;
253+ /**
254+ * @brief If not zero, then this is final OpcodeDesc structure in
255+ * the list of opcodes for a given mnemonic.
256+ */
257+ char last;
258+ char platf;
259+ };
260+ };
261+public:
262+ /**
263+ * @brief General info about mnemonic.
264+ */
265+ struct MnemonicDesc {
266+ /**
267+ * @brief The mnemonic itself.
268+ */
269+ Mnemonic mn;
270+ /**
271+ * Various characteristics of mnemonic.
272+ * @see MF_
273+ */
274+ unsigned flags;
275+ /**
276+ * @brief Operation's operand's count and roles.
277+ *
278+ * For the operations whose opcodes may use different number of
279+ * operands (i.e. IMUL/SHL) either most common value used, or empty
280+ * value left.
281+ */
282+ OpndRolesDesc roles;
283+ /**
284+ * @brief Print name of the mnemonic.
285+ */
286+ const char * name;
287+ };
288+
289+
290+ /**
291+ * @brief Magic number, shows a maximum value a hash code can take.
292+ *
293+ * For meaning and arithmetics see enc_tabl.cpp.
294+ *
295+ * The value was increased from '5155' to '8192' to make it aligned
296+ * for faster access in EncoderBase::lookup().
297+ *
298+ * It was further increased to 16384 as support for 3 operand opcodes
299+ * with XMM registers were added
300+ */
301+ static const unsigned int HASH_MAX = 16384; //5155;
302+ /**
303+ * @brief Empty value, used in hash-to-opcode map to show an empty slot.
304+ */
305+ static const unsigned char NOHASH = 0xFF;
306+ /**
307+ * @brief The name says it all.
308+ */
309+ static const unsigned char HASH_BITS_PER_OPERAND = 5;
310+
311+ /**
312+ * @brief Contains info about a single instructions's operand - its
313+ * location, size and a value for immediate or RegName for
314+ * register operands.
315+ */
316+ class Operand {
317+ public:
318+ /**
319+ * @brief Initializes the instance with empty size and kind.
320+ */
321+ Operand() : m_kind(OpndKind_Null), m_size(OpndSize_Null), m_ext(OpndExt_None), m_need_rex(false) {}
322+ /**
323+ * @brief Creates register operand from given RegName.
324+ */
325+ Operand(RegName reg, OpndExt ext = OpndExt_None) : m_kind(getRegKind(reg)),
326+ m_size(getRegSize(reg)),
327+ m_ext(ext), m_reg(reg)
328+ {
329+ hash_it();
330+ }
331+ /**
332+ * @brief Creates register operand from given RegName and with the
333+ * specified size and kind.
334+ *
335+ * Used to speedup Operand creation as there is no need to extract
336+ * size and kind from the RegName.
337+ * The provided size and kind must match the RegName's ones though.
338+ */
339+ Operand(OpndSize sz, OpndKind kind, RegName reg, OpndExt ext = OpndExt_None) :
340+ m_kind(kind), m_size(sz), m_ext(ext), m_reg(reg)
341+ {
342+ assert(m_size == getRegSize(reg));
343+ assert(m_kind == getRegKind(reg));
344+ hash_it();
345+ }
346+ /**
347+ * @brief Creates immediate operand with the given size and value.
348+ */
349+ Operand(OpndSize size, long long ival, OpndExt ext = OpndExt_None) :
350+ m_kind(OpndKind_Imm), m_size(size), m_ext(ext), m_imm64(ival)
351+ {
352+ hash_it();
353+ }
354+ /**
355+ * @brief Creates immediate operand of OpndSize_32.
356+ */
357+ Operand(int ival, OpndExt ext = OpndExt_None) :
358+ m_kind(OpndKind_Imm), m_size(OpndSize_32), m_ext(ext), m_imm64(ival)
359+ {
360+ hash_it();
361+ }
362+ /**
363+ * @brief Creates immediate operand of OpndSize_16.
364+ */
365+ Operand(short ival, OpndExt ext = OpndExt_None) :
366+ m_kind(OpndKind_Imm), m_size(OpndSize_16), m_ext(ext), m_imm64(ival)
367+ {
368+ hash_it();
369+ }
370+
371+ /**
372+ * @brief Creates immediate operand of OpndSize_8.
373+ */
374+ Operand(char ival, OpndExt ext = OpndExt_None) :
375+ m_kind(OpndKind_Imm), m_size(OpndSize_8), m_ext(ext), m_imm64(ival)
376+ {
377+ hash_it();
378+ }
379+
380+ /**
381+ * @brief Creates memory operand.
382+ */
383+ Operand(OpndSize size, RegName base, RegName index, unsigned scale,
384+ int disp, OpndExt ext = OpndExt_None) : m_kind(OpndKind_Mem), m_size(size), m_ext(ext)
385+ {
386+ m_base = base;
387+ m_index = index;
388+ m_scale = scale;
389+ m_disp = disp;
390+ hash_it();
391+ }
392+
393+ /**
394+ * @brief Creates memory operand with only base and displacement.
395+ */
396+ Operand(OpndSize size, RegName base, int disp, OpndExt ext = OpndExt_None) :
397+ m_kind(OpndKind_Mem), m_size(size), m_ext(ext)
398+ {
399+ m_base = base;
400+ m_index = RegName_Null;
401+ m_scale = 0;
402+ m_disp = disp;
403+ hash_it();
404+ }
405+ //
406+ // general info
407+ //
408+ /**
409+ * @brief Returns kind of the operand.
410+ */
411+ OpndKind kind(void) const { return m_kind; }
412+ /**
413+ * @brief Returns size of the operand.
414+ */
415+ OpndSize size(void) const { return m_size; }
416+ /**
417+ * @brief Returns extention of the operand.
418+ */
419+ OpndExt ext(void) const { return m_ext; }
420+ /**
421+ * @brief Returns hash of the operand.
422+ */
423+ unsigned hash(void) const { return m_hash; }
424+ //
425+#ifdef _EM64T_
426+ bool need_rex(void) const { return m_need_rex; }
427+#else
428+ bool need_rex(void) const { return false; }
429+#endif
430+ /**
431+ * @brief Tests whether operand is memory operand.
432+ */
433+ bool is_mem(void) const { return is_placed_in(OpndKind_Mem); }
434+ /**
435+ * @brief Tests whether operand is immediate operand.
436+ */
437+ bool is_imm(void) const { return is_placed_in(OpndKind_Imm); }
438+ /**
439+ * @brief Tests whether operand is register operand.
440+ */
441+ bool is_reg(void) const { return is_placed_in(OpndKind_Reg); }
442+ /**
443+ * @brief Tests whether operand is general-purpose register operand.
444+ */
445+ bool is_gpreg(void) const { return is_placed_in(OpndKind_GPReg); }
446+ /**
447+ * @brief Tests whether operand is float-point pseudo-register operand.
448+ */
449+ bool is_fpreg(void) const { return is_placed_in(OpndKind_FPReg); }
450+ /**
451+ * @brief Tests whether operand is XMM register operand.
452+ */
453+ bool is_xmmreg(void) const { return is_placed_in(OpndKind_XMMReg); }
454+#ifdef _HAVE_MMX_
455+ /**
456+ * @brief Tests whether operand is MMX register operand.
457+ */
458+ bool is_mmxreg(void) const { return is_placed_in(OpndKind_MMXReg); }
459+#endif
460+ /**
461+ * @brief Tests whether operand is signed immediate operand.
462+ */
463+ //bool is_signed(void) const { assert(is_imm()); return m_is_signed; }
464+
465+ /**
466+ * @brief Returns base of memory operand (RegName_Null if not memory).
467+ */
468+ RegName base(void) const { return is_mem() ? m_base : RegName_Null; }
469+ /**
470+ * @brief Returns index of memory operand (RegName_Null if not memory).
471+ */
472+ RegName index(void) const { return is_mem() ? m_index : RegName_Null; }
473+ /**
474+ * @brief Returns scale of memory operand (0 if not memory).
475+ */
476+ unsigned scale(void) const { return is_mem() ? m_scale : 0; }
477+ /**
478+ * @brief Returns displacement of memory operand (0 if not memory).
479+ */
480+ int disp(void) const { return is_mem() ? m_disp : 0; }
481+ /**
482+ * @brief Returns RegName of register operand (RegName_Null if not
483+ * register).
484+ */
485+ RegName reg(void) const { return is_reg() ? m_reg : RegName_Null; }
486+ /**
487+ * @brief Returns value of immediate operand (0 if not immediate).
488+ */
489+ long long imm(void) const { return is_imm() ? m_imm64 : 0; }
490+ private:
491+ bool is_placed_in(OpndKind kd) const
492+ {
493+ return kd == OpndKind_Reg ?
494+ m_kind == OpndKind_GPReg ||
495+#ifdef _HAVE_MMX_
496+ m_kind == OpndKind_MMXReg ||
497+#endif
498+ m_kind == OpndKind_FPReg ||
499+ m_kind == OpndKind_XMMReg
500+ : kd == m_kind;
501+ }
502+ void hash_it(void)
503+ {
504+ m_hash = get_size_hash(m_size) | get_kind_hash(m_kind);
505+#ifdef _EM64T_
506+ m_need_rex = false;
507+ if (is_reg() && is_em64t_extra_reg(m_reg)) {
508+ m_need_rex = true;
509+ }
510+ else if (is_mem() && (is_em64t_extra_reg(m_base) ||
511+ is_em64t_extra_reg(m_index))) {
512+ m_need_rex = true;
513+ }
514+#endif
515+ }
516+ // general info
517+ OpndKind m_kind;
518+ OpndSize m_size;
519+ OpndExt m_ext;
520+ // complex address form support
521+ RegName m_base;
522+ RegName m_index;
523+ unsigned m_scale;
524+ union {
525+ int m_disp;
526+ RegName m_reg;
527+ long long m_imm64;
528+ };
529+ unsigned m_hash;
530+ bool m_need_rex;
531+ friend class EncoderBase::Operands;
532+ };
533+ /**
534+ * @brief Simple container for up to 3 Operand-s.
535+ */
536+ class Operands {
537+ public:
538+ Operands(void)
539+ {
540+ clear();
541+ }
542+ Operands(const Operand& op0)
543+ {
544+ clear();
545+ add(op0);
546+ }
547+
548+ Operands(const Operand& op0, const Operand& op1)
549+ {
550+ clear();
551+ add(op0); add(op1);
552+ }
553+
554+ Operands(const Operand& op0, const Operand& op1, const Operand& op2)
555+ {
556+ clear();
557+ add(op0); add(op1); add(op2);
558+ }
559+
560+ unsigned count(void) const { return m_count; }
561+ unsigned hash(void) const { return m_hash; }
562+ const Operand& operator[](unsigned idx) const
563+ {
564+ assert(idx<m_count);
565+ return m_operands[idx];
566+ }
567+
568+ void add(const Operand& op)
569+ {
570+ assert(m_count < COUNTOF(m_operands));
571+ m_hash = (m_hash<<HASH_BITS_PER_OPERAND) | op.hash();
572+ m_operands[m_count++] = op;
573+ m_need_rex = m_need_rex || op.m_need_rex;
574+ }
575+#ifdef _EM64T_
576+ bool need_rex(void) const { return m_need_rex; }
577+#else
578+ bool need_rex(void) const { return false; }
579+#endif
580+ void clear(void)
581+ {
582+ m_count = 0; m_hash = 0; m_need_rex = false;
583+ }
584+ private:
585+ unsigned m_count;
586+ Operand m_operands[COUNTOF( ((OpcodeDesc*)NULL)->opnds )];
587+ unsigned m_hash;
588+ bool m_need_rex;
589+ };
590+public:
591+#ifdef _DEBUG
592+ /**
593+ * Verifies some presumptions about encoding data table.
594+ * Called automaticaly during statics initialization.
595+ */
596+ static int verify(void);
597+#endif
598+
599+private:
600+ /**
601+ * @brief Returns found OpcodeDesc by the given Mnemonic and operands.
602+ */
603+ static const OpcodeDesc * lookup(Mnemonic mn, const Operands& opnds);
604+ /**
605+ * @brief Encodes mod/rm byte.
606+ */
607+ static char* encodeModRM(char* stream, const Operands& opnds,
608+ unsigned idx, const OpcodeDesc * odesc, Rex * prex);
609+ /**
610+ * @brief Encodes special things of opcode description - '/r', 'ib', etc.
611+ */
612+ static char* encode_aux(char* stream, unsigned aux,
613+ const Operands& opnds, const OpcodeDesc * odesc,
614+ unsigned * pargsCount, Rex* prex);
615+#ifdef _EM64T_
616+ /**
617+ * @brief Returns true if the 'reg' argument represents one of the new
618+ * EM64T registers - R8(D)-R15(D).
619+ *
620+ * The 64 bits versions of 'old-fashion' registers, i.e. RAX are not
621+ * considered as 'extra'.
622+ */
623+ static bool is_em64t_extra_reg(const RegName reg)
624+ {
625+ if (needs_rex_r(reg)) {
626+ return true;
627+ }
628+ if (RegName_SPL <= reg && reg <= RegName_R15L) {
629+ return true;
630+ }
631+ return false;
632+ }
633+ static bool needs_rex_r(const RegName reg)
634+ {
635+ if (RegName_R8 <= reg && reg <= RegName_R15) {
636+ return true;
637+ }
638+ if (RegName_R8D <= reg && reg <= RegName_R15D) {
639+ return true;
640+ }
641+ if (RegName_R8S <= reg && reg <= RegName_R15S) {
642+ return true;
643+ }
644+ if (RegName_R8L <= reg && reg <= RegName_R15L) {
645+ return true;
646+ }
647+ if (RegName_XMM8 <= reg && reg <= RegName_XMM15) {
648+ return true;
649+ }
650+ if (RegName_XMM8D <= reg && reg <= RegName_XMM15D) {
651+ return true;
652+ }
653+ if (RegName_XMM8S <= reg && reg <= RegName_XMM15S) {
654+ return true;
655+ }
656+ return false;
657+ }
658+ /**
659+ * @brief Returns an 'processor's index' of the register - the index
660+ * used to encode the register in ModRM/SIB bytes.
661+ *
662+ * For the new EM64T registers the 'HW index' differs from the index
663+ * encoded in RegName. For old-fashion registers it's effectively the
664+ * same as ::getRegIndex(RegName).
665+ */
666+ static unsigned char getHWRegIndex(const RegName reg)
667+ {
668+ if (getRegKind(reg) != OpndKind_GPReg) {
669+ return getRegIndex(reg);
670+ }
671+ if (RegName_SPL <= reg && reg<=RegName_DIL) {
672+ return getRegIndex(reg);
673+ }
674+ if (RegName_R8L<= reg && reg<=RegName_R15L) {
675+ return getRegIndex(reg) - getRegIndex(RegName_R8L);
676+ }
677+ return is_em64t_extra_reg(reg) ?
678+ getRegIndex(reg)-getRegIndex(RegName_R8D) : getRegIndex(reg);
679+ }
680+#else
681+ static unsigned char getHWRegIndex(const RegName reg)
682+ {
683+ return getRegIndex(reg);
684+ }
685+ static bool is_em64t_extra_reg(const RegName reg)
686+ {
687+ return false;
688+ }
689+#endif
690+public:
691+ static unsigned char get_size_hash(OpndSize size) {
692+ return (size <= OpndSize_64) ? size_hash[size] : 0xFF;
693+ }
694+ static unsigned char get_kind_hash(OpndKind kind) {
695+ return (kind <= OpndKind_Mem) ? kind_hash[kind] : 0xFF;
696+ }
697+
698+ /**
699+ * @brief A table used for the fast computation of hash value.
700+ *
701+ * A change must be strictly balanced with hash-related functions and data
702+ * in enc_base.h/.cpp.
703+ */
704+ static const unsigned char size_hash[OpndSize_64+1];
705+ /**
706+ * @brief A table used for the fast computation of hash value.
707+ *
708+ * A change must be strictly balanced with hash-related functions and data
709+ * in enc_base.h/.cpp.
710+ */
711+ static const unsigned char kind_hash[OpndKind_Mem+1];
712+ /**
713+ * @brief Maximum number of opcodes used for a single mnemonic.
714+ *
715+ * No arithmetics behind the number, simply estimated.
716+ */
717+ static const unsigned int MAX_OPCODES = 32; //20;
718+ /**
719+ * @brief Mapping between operands hash code and operands.
720+ */
721+ static unsigned char opcodesHashMap[Mnemonic_Count][HASH_MAX];
722+ /**
723+ * @brief Array of mnemonics.
724+ */
725+ static MnemonicDesc mnemonics[Mnemonic_Count];
726+ /**
727+ * @brief Array of available opcodes.
728+ */
729+ static OpcodeDesc opcodes[Mnemonic_Count][MAX_OPCODES];
730+
731+ static int buildTable(void);
732+ static void buildMnemonicDesc(const MnemonicInfo * minfo);
733+ /**
734+ * @brief Computes hash value for the given operands.
735+ */
736+ static unsigned short getHash(const OpcodeInfo* odesc);
737+ /**
738+ * @brief Dummy variable, for automatic invocation of buildTable() at
739+ * startup.
740+ */
741+ static int dummy;
742+
743+ static char * curRelOpnd[3];
744+};
745+
746+ENCODER_NAMESPACE_END
747+
748+#endif // ifndef __ENC_BASE_H_INCLUDED__
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/enc_defs.h
@@ -0,0 +1,786 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+#ifndef _ENCODER_DEFS_H_
21+#define _ENCODER_DEFS_H_
22+
23+
24+// Used to isolate experimental or being tuned encoder into a separate
25+// namespace so it can coexist with a stable one in the same bundle.
26+#ifdef ENCODER_ISOLATE
27+ #define ENCODER_NAMESPACE_START namespace enc_ia32 {
28+ #define ENCODER_NAMESPACE_END };
29+#else
30+ #define ENCODER_NAMESPACE_START
31+ #define ENCODER_NAMESPACE_END
32+#endif
33+
34+#include <assert.h>
35+#include "enc_defs_ext.h"
36+
37+#ifndef COUNTOF
38+ /**
39+ * Number of items in an array.
40+ */
41+ #define COUNTOF(a) (sizeof(a)/sizeof(a[0]))
42+#endif
43+
44+#ifdef _EM64T_
45+ /**
46+ * A stack pointer of default platform's size.
47+ */
48+ #define REG_STACK RegName_RSP
49+ /**
50+ * A max GP register (with a highest index number)
51+ */
52+ #define REG_MAX RegName_R15
53+ /**
54+ * Total number of GP registers including stack pointer.
55+ */
56+ #define MAX_REGS 15
57+#else
58+ #define REG_STACK RegName_ESP
59+ #define REG_MAX RegName_EDI
60+ #define MAX_REGS 8
61+#endif
62+
63+ENCODER_NAMESPACE_START
64+
65+/**
66+ * A number of bytes 'eaten' by an ordinary PUSH/POP.
67+ */
68+#define STACK_SLOT_SIZE (sizeof(void*))
69+
70+
71+/**
72+ * A recommended by Intel Arch Manual aligment for instructions that
73+ * are targets for jmps.
74+ */
75+#define JMP_TARGET_ALIGMENT (16)
76+/**
77+ * A maximum possible size of native instruction.
78+ */
79+#define MAX_NATIVE_INST_SIZE (15)
80+/**
81+ * The enum OpndKind describes an operand's location - memory, immediate or a register.
82+ * It can be used as a bit mask.
83+ */
84+typedef enum OpndKind {
85+ /**
86+ * A change must be balanced with at least the following places:
87+ * Ia32::Constraint-s use the OpndKind as a mask
88+ * encoder.cpp & encoder_master_info.cpp uses OpndKind as an index for hashing
89+ * - perhaps there are much more places
90+ *
91+ * NOTE: an MMXReg kind is incompatible with the current constraints framework,
92+ * as it's not encoded as a mask.
93+ */
94+ OpndKind_Null=0,
95+ OpndKind_GPReg = 0x01, OpndKind_MinRegKind = OpndKind_GPReg,
96+ OpndKind_SReg = 0x02,
97+#ifdef _HAVE_MMX_
98+ OpndKind_MMXReg = 0x03,
99+#endif
100+ OpndKind_FPReg = 0x04,
101+ OpndKind_XMMReg = 0x08,
102+ OpndKind_OtherReg = 0x10,
103+ OpndKind_StatusReg = OpndKind_OtherReg,
104+ OpndKind_MaxRegKind = OpndKind_StatusReg, // a max existing kind of register
105+ OpndKind_MaxReg, // -'- + 1 to be used in array defs
106+ //
107+ OpndKind_Immediate = 0x20, OpndKind_Imm=OpndKind_Immediate,
108+ OpndKind_Memory = 0x40, OpndKind_Mem=OpndKind_Memory,
109+ //
110+ OpndKind_Reg = 0x1F,
111+ OpndKind_Any = 0x7F,
112+ // syntetic constants. Normally not used anywhere, but are used for
113+ // human-readable showing under the debugger
114+ OpndKind_GPReg_Mem = OpndKind_GPReg|OpndKind_Mem,
115+#ifdef _HAVE_MMX_
116+ OpndKind_MMXReg_Mem = OpndKind_MMXReg|OpndKind_Mem,
117+#endif
118+ OpndKind_XMMReg_Mem = OpndKind_XMMReg|OpndKind_Mem,
119+} OpndKind;
120+
121+/**
122+ * Defines type of extention allowed for particular operand.
123+ * For example imul r32,r_m32,imm8 sign extend imm8 before performing multiplication.
124+ * To satisfy instruction constraints immediate operand should be either OpndExt_Signed
125+ * or OpndExt_Any.
126+ */
127+typedef enum OpndExt {
128+ OpndExt_None = 0x0,
129+ OpndExt_Signed = 0x1,
130+ OpndExt_Zero = 0x2,
131+ OpndExt_Any = 0x3,
132+}OpndExt;
133+
134+/**
135+ * enum OpndRole defines the role of an operand in an instruction
136+ * Can be used as mask to combine def and use. The complete def+use
137+ * info can be combined in 2 bits which is used, say in Encoder::OpndRole.
138+ */
139+//TODO: this duplicates an Role used in the Ia32::Inst. That duplicate enum should be removed.
140+typedef enum OpndRole {
141+ OpndRole_Null=0,
142+ OpndRole_Use=0x1,
143+ OpndRole_Def=0x2,
144+ OpndRole_UseDef=OpndRole_Use|OpndRole_Def,
145+ OpndRole_All=0xffff,
146+} OpndRole;
147+
148+
149+#define REGNAME(k,s,i) ( ((k & OpndKind_Any)<<24) | ((s & OpndSize_Any)<<16) | (i&0xFF) )
150+
151+// Gregory -
152+// It is critical that all register indexes (3rd number) inside of the
153+// following table go in ascending order. That is R8 goes after
154+// RDI. It is necessary for decoder when extending registers from RAX-RDI
155+// to R8-R15 by simply adding 8 to the index on EM64T architecture
156+typedef enum RegName {
157+
158+ RegName_Null = 0,
159+
160+#ifdef _EM64T_
161+ /*
162+ An index part of the RegName-s for RAX-RDI, EAX-ESI, AX-SI and AL-BH is
163+ the same as the index used during instructions encoding. The same rule
164+ applies for XMM regsters for IA32.
165+ For new EM64T registers (both GP and XMM) the index need to be corrected to
166+ obtain the index used in processor's instructions.
167+ */
168+ RegName_RAX = REGNAME(OpndKind_GPReg,OpndSize_64,0),
169+ RegName_RCX = REGNAME(OpndKind_GPReg,OpndSize_64,1),
170+ RegName_RDX = REGNAME(OpndKind_GPReg,OpndSize_64,2),
171+ RegName_RBX = REGNAME(OpndKind_GPReg,OpndSize_64,3),
172+ RegName_RSP = REGNAME(OpndKind_GPReg,OpndSize_64,4),
173+ RegName_RBP = REGNAME(OpndKind_GPReg,OpndSize_64,5),
174+ RegName_RSI = REGNAME(OpndKind_GPReg,OpndSize_64,6),
175+ RegName_RDI = REGNAME(OpndKind_GPReg,OpndSize_64,7),
176+
177+ RegName_R8 = REGNAME(OpndKind_GPReg,OpndSize_64,8),
178+ RegName_R9 = REGNAME(OpndKind_GPReg,OpndSize_64,9),
179+ RegName_R10 = REGNAME(OpndKind_GPReg,OpndSize_64,10),
180+ RegName_R11 = REGNAME(OpndKind_GPReg,OpndSize_64,11),
181+ RegName_R12 = REGNAME(OpndKind_GPReg,OpndSize_64,12),
182+ RegName_R13 = REGNAME(OpndKind_GPReg,OpndSize_64,13),
183+ RegName_R14 = REGNAME(OpndKind_GPReg,OpndSize_64,14),
184+ RegName_R15 = REGNAME(OpndKind_GPReg,OpndSize_64,15),
185+#endif //~_EM64T_
186+
187+ RegName_EAX=REGNAME(OpndKind_GPReg,OpndSize_32,0),
188+ RegName_ECX=REGNAME(OpndKind_GPReg,OpndSize_32,1),
189+ RegName_EDX=REGNAME(OpndKind_GPReg,OpndSize_32,2),
190+ RegName_EBX=REGNAME(OpndKind_GPReg,OpndSize_32,3),
191+ RegName_ESP=REGNAME(OpndKind_GPReg,OpndSize_32,4),
192+ RegName_EBP=REGNAME(OpndKind_GPReg,OpndSize_32,5),
193+ RegName_ESI=REGNAME(OpndKind_GPReg,OpndSize_32,6),
194+ RegName_EDI=REGNAME(OpndKind_GPReg,OpndSize_32,7),
195+
196+#ifdef _EM64T_
197+ RegName_R8D = REGNAME(OpndKind_GPReg,OpndSize_32,8),
198+ RegName_R9D = REGNAME(OpndKind_GPReg,OpndSize_32,9),
199+ RegName_R10D = REGNAME(OpndKind_GPReg,OpndSize_32,10),
200+ RegName_R11D = REGNAME(OpndKind_GPReg,OpndSize_32,11),
201+ RegName_R12D = REGNAME(OpndKind_GPReg,OpndSize_32,12),
202+ RegName_R13D = REGNAME(OpndKind_GPReg,OpndSize_32,13),
203+ RegName_R14D = REGNAME(OpndKind_GPReg,OpndSize_32,14),
204+ RegName_R15D = REGNAME(OpndKind_GPReg,OpndSize_32,15),
205+#endif //~_EM64T_
206+
207+ RegName_AX=REGNAME(OpndKind_GPReg,OpndSize_16,0),
208+ RegName_CX=REGNAME(OpndKind_GPReg,OpndSize_16,1),
209+ RegName_DX=REGNAME(OpndKind_GPReg,OpndSize_16,2),
210+ RegName_BX=REGNAME(OpndKind_GPReg,OpndSize_16,3),
211+ RegName_SP=REGNAME(OpndKind_GPReg,OpndSize_16,4),
212+ RegName_BP=REGNAME(OpndKind_GPReg,OpndSize_16,5),
213+ RegName_SI=REGNAME(OpndKind_GPReg,OpndSize_16,6),
214+ RegName_DI=REGNAME(OpndKind_GPReg,OpndSize_16,7),
215+
216+#ifdef _EM64T_
217+ RegName_R8S = REGNAME(OpndKind_GPReg,OpndSize_16,8),
218+ RegName_R9S = REGNAME(OpndKind_GPReg,OpndSize_16,9),
219+ RegName_R10S = REGNAME(OpndKind_GPReg,OpndSize_16,10),
220+ RegName_R11S = REGNAME(OpndKind_GPReg,OpndSize_16,11),
221+ RegName_R12S = REGNAME(OpndKind_GPReg,OpndSize_16,12),
222+ RegName_R13S = REGNAME(OpndKind_GPReg,OpndSize_16,13),
223+ RegName_R14S = REGNAME(OpndKind_GPReg,OpndSize_16,14),
224+ RegName_R15S = REGNAME(OpndKind_GPReg,OpndSize_16,15),
225+#endif //~_EM64T_
226+
227+ RegName_AL=REGNAME(OpndKind_GPReg,OpndSize_8,0),
228+ RegName_CL=REGNAME(OpndKind_GPReg,OpndSize_8,1),
229+ RegName_DL=REGNAME(OpndKind_GPReg,OpndSize_8,2),
230+ RegName_BL=REGNAME(OpndKind_GPReg,OpndSize_8,3),
231+ // FIXME: Used in enc_tabl.cpp
232+ // AH is not accessible on EM64T, instead encoded register is SPL, so decoded
233+ // register will return incorrect enum
234+ RegName_AH=REGNAME(OpndKind_GPReg,OpndSize_8,4),
235+#if !defined(_EM64T_)
236+ RegName_CH=REGNAME(OpndKind_GPReg,OpndSize_8,5),
237+ RegName_DH=REGNAME(OpndKind_GPReg,OpndSize_8,6),
238+ RegName_BH=REGNAME(OpndKind_GPReg,OpndSize_8,7),
239+#else
240+ RegName_SPL=REGNAME(OpndKind_GPReg,OpndSize_8,4),
241+ RegName_BPL=REGNAME(OpndKind_GPReg,OpndSize_8,5),
242+ RegName_SIL=REGNAME(OpndKind_GPReg,OpndSize_8,6),
243+ RegName_DIL=REGNAME(OpndKind_GPReg,OpndSize_8,7),
244+ RegName_R8L=REGNAME(OpndKind_GPReg,OpndSize_8,8),
245+ RegName_R9L=REGNAME(OpndKind_GPReg,OpndSize_8,9),
246+ RegName_R10L=REGNAME(OpndKind_GPReg,OpndSize_8,10),
247+ RegName_R11L=REGNAME(OpndKind_GPReg,OpndSize_8,11),
248+ RegName_R12L=REGNAME(OpndKind_GPReg,OpndSize_8,12),
249+ RegName_R13L=REGNAME(OpndKind_GPReg,OpndSize_8,13),
250+ RegName_R14L=REGNAME(OpndKind_GPReg,OpndSize_8,14),
251+ RegName_R15L=REGNAME(OpndKind_GPReg,OpndSize_8,15),
252+#endif
253+
254+ RegName_ES=REGNAME(OpndKind_SReg,OpndSize_16,0),
255+ RegName_CS=REGNAME(OpndKind_SReg,OpndSize_16,1),
256+ RegName_SS=REGNAME(OpndKind_SReg,OpndSize_16,2),
257+ RegName_DS=REGNAME(OpndKind_SReg,OpndSize_16,3),
258+ RegName_FS=REGNAME(OpndKind_SReg,OpndSize_16,4),
259+ RegName_GS=REGNAME(OpndKind_SReg,OpndSize_16,5),
260+
261+ RegName_EFLAGS=REGNAME(OpndKind_StatusReg,OpndSize_32,0),
262+
263+#if !defined(TESTING_ENCODER)
264+ RegName_FP0=REGNAME(OpndKind_FPReg,OpndSize_80,0),
265+ RegName_FP1=REGNAME(OpndKind_FPReg,OpndSize_80,1),
266+ RegName_FP2=REGNAME(OpndKind_FPReg,OpndSize_80,2),
267+ RegName_FP3=REGNAME(OpndKind_FPReg,OpndSize_80,3),
268+ RegName_FP4=REGNAME(OpndKind_FPReg,OpndSize_80,4),
269+ RegName_FP5=REGNAME(OpndKind_FPReg,OpndSize_80,5),
270+ RegName_FP6=REGNAME(OpndKind_FPReg,OpndSize_80,6),
271+ RegName_FP7=REGNAME(OpndKind_FPReg,OpndSize_80,7),
272+#endif
273+ RegName_FP0S=REGNAME(OpndKind_FPReg,OpndSize_32,0),
274+ RegName_FP1S=REGNAME(OpndKind_FPReg,OpndSize_32,1),
275+ RegName_FP2S=REGNAME(OpndKind_FPReg,OpndSize_32,2),
276+ RegName_FP3S=REGNAME(OpndKind_FPReg,OpndSize_32,3),
277+ RegName_FP4S=REGNAME(OpndKind_FPReg,OpndSize_32,4),
278+ RegName_FP5S=REGNAME(OpndKind_FPReg,OpndSize_32,5),
279+ RegName_FP6S=REGNAME(OpndKind_FPReg,OpndSize_32,6),
280+ RegName_FP7S=REGNAME(OpndKind_FPReg,OpndSize_32,7),
281+
282+ RegName_FP0D=REGNAME(OpndKind_FPReg,OpndSize_64,0),
283+ RegName_FP1D=REGNAME(OpndKind_FPReg,OpndSize_64,1),
284+ RegName_FP2D=REGNAME(OpndKind_FPReg,OpndSize_64,2),
285+ RegName_FP3D=REGNAME(OpndKind_FPReg,OpndSize_64,3),
286+ RegName_FP4D=REGNAME(OpndKind_FPReg,OpndSize_64,4),
287+ RegName_FP5D=REGNAME(OpndKind_FPReg,OpndSize_64,5),
288+ RegName_FP6D=REGNAME(OpndKind_FPReg,OpndSize_64,6),
289+ RegName_FP7D=REGNAME(OpndKind_FPReg,OpndSize_64,7),
290+
291+#if !defined(TESTING_ENCODER)
292+ RegName_XMM0=REGNAME(OpndKind_XMMReg,OpndSize_128,0),
293+ RegName_XMM1=REGNAME(OpndKind_XMMReg,OpndSize_128,1),
294+ RegName_XMM2=REGNAME(OpndKind_XMMReg,OpndSize_128,2),
295+ RegName_XMM3=REGNAME(OpndKind_XMMReg,OpndSize_128,3),
296+ RegName_XMM4=REGNAME(OpndKind_XMMReg,OpndSize_128,4),
297+ RegName_XMM5=REGNAME(OpndKind_XMMReg,OpndSize_128,5),
298+ RegName_XMM6=REGNAME(OpndKind_XMMReg,OpndSize_128,6),
299+ RegName_XMM7=REGNAME(OpndKind_XMMReg,OpndSize_128,7),
300+
301+#ifdef _EM64T_
302+ RegName_XMM8 = REGNAME(OpndKind_XMMReg,OpndSize_128,0),
303+ RegName_XMM9 = REGNAME(OpndKind_XMMReg,OpndSize_128,1),
304+ RegName_XMM10 = REGNAME(OpndKind_XMMReg,OpndSize_128,2),
305+ RegName_XMM11 = REGNAME(OpndKind_XMMReg,OpndSize_128,3),
306+ RegName_XMM12 = REGNAME(OpndKind_XMMReg,OpndSize_128,4),
307+ RegName_XMM13 = REGNAME(OpndKind_XMMReg,OpndSize_128,5),
308+ RegName_XMM14 = REGNAME(OpndKind_XMMReg,OpndSize_128,6),
309+ RegName_XMM15 = REGNAME(OpndKind_XMMReg,OpndSize_128,7),
310+#endif //~_EM64T_
311+
312+#endif // ~TESTING_ENCODER
313+
314+ RegName_XMM0S=REGNAME(OpndKind_XMMReg,OpndSize_32,0),
315+ RegName_XMM1S=REGNAME(OpndKind_XMMReg,OpndSize_32,1),
316+ RegName_XMM2S=REGNAME(OpndKind_XMMReg,OpndSize_32,2),
317+ RegName_XMM3S=REGNAME(OpndKind_XMMReg,OpndSize_32,3),
318+ RegName_XMM4S=REGNAME(OpndKind_XMMReg,OpndSize_32,4),
319+ RegName_XMM5S=REGNAME(OpndKind_XMMReg,OpndSize_32,5),
320+ RegName_XMM6S=REGNAME(OpndKind_XMMReg,OpndSize_32,6),
321+ RegName_XMM7S=REGNAME(OpndKind_XMMReg,OpndSize_32,7),
322+#ifdef _EM64T_
323+ RegName_XMM8S=REGNAME(OpndKind_XMMReg,OpndSize_32,8),
324+ RegName_XMM9S=REGNAME(OpndKind_XMMReg,OpndSize_32,9),
325+ RegName_XMM10S=REGNAME(OpndKind_XMMReg,OpndSize_32,10),
326+ RegName_XMM11S=REGNAME(OpndKind_XMMReg,OpndSize_32,11),
327+ RegName_XMM12S=REGNAME(OpndKind_XMMReg,OpndSize_32,12),
328+ RegName_XMM13S=REGNAME(OpndKind_XMMReg,OpndSize_32,13),
329+ RegName_XMM14S=REGNAME(OpndKind_XMMReg,OpndSize_32,14),
330+ RegName_XMM15S=REGNAME(OpndKind_XMMReg,OpndSize_32,15),
331+#endif // ifdef _EM64T_
332+ RegName_XMM0D=REGNAME(OpndKind_XMMReg,OpndSize_64,0),
333+ RegName_XMM1D=REGNAME(OpndKind_XMMReg,OpndSize_64,1),
334+ RegName_XMM2D=REGNAME(OpndKind_XMMReg,OpndSize_64,2),
335+ RegName_XMM3D=REGNAME(OpndKind_XMMReg,OpndSize_64,3),
336+ RegName_XMM4D=REGNAME(OpndKind_XMMReg,OpndSize_64,4),
337+ RegName_XMM5D=REGNAME(OpndKind_XMMReg,OpndSize_64,5),
338+ RegName_XMM6D=REGNAME(OpndKind_XMMReg,OpndSize_64,6),
339+ RegName_XMM7D=REGNAME(OpndKind_XMMReg,OpndSize_64,7),
340+#ifdef _EM64T_
341+ RegName_XMM8D=REGNAME(OpndKind_XMMReg,OpndSize_64,8),
342+ RegName_XMM9D=REGNAME(OpndKind_XMMReg,OpndSize_64,9),
343+ RegName_XMM10D=REGNAME(OpndKind_XMMReg,OpndSize_64,10),
344+ RegName_XMM11D=REGNAME(OpndKind_XMMReg,OpndSize_64,11),
345+ RegName_XMM12D=REGNAME(OpndKind_XMMReg,OpndSize_64,12),
346+ RegName_XMM13D=REGNAME(OpndKind_XMMReg,OpndSize_64,13),
347+ RegName_XMM14D=REGNAME(OpndKind_XMMReg,OpndSize_64,14),
348+ RegName_XMM15D=REGNAME(OpndKind_XMMReg,OpndSize_64,15),
349+#endif // ifdef _EM64T_
350+#ifdef _HAVE_MMX_
351+ RegName_MMX0=REGNAME(OpndKind_MMXReg,OpndSize_64,0),
352+ RegName_MMX1=REGNAME(OpndKind_MMXReg,OpndSize_64,1),
353+ RegName_MMX2=REGNAME(OpndKind_MMXReg,OpndSize_64,2),
354+ RegName_MMX3=REGNAME(OpndKind_MMXReg,OpndSize_64,3),
355+ RegName_MMX4=REGNAME(OpndKind_MMXReg,OpndSize_64,4),
356+ RegName_MMX5=REGNAME(OpndKind_MMXReg,OpndSize_64,5),
357+ RegName_MMX6=REGNAME(OpndKind_MMXReg,OpndSize_64,6),
358+ RegName_MMX7=REGNAME(OpndKind_MMXReg,OpndSize_64,7),
359+#endif // _HAVE_MMX_
360+} RegName;
361+
362+#if 0 // Android x86: use mnemonics defined in enc_defs_ext.h
363+/**
364+ * Conditional mnemonics.
365+ * The values match the 'real' (==processor's) values of the appropriate
366+ * condition values used in the opcodes.
367+ */
368+enum ConditionMnemonic {
369+
370+ ConditionMnemonic_O=0,
371+ ConditionMnemonic_NO=1,
372+ ConditionMnemonic_B=2, ConditionMnemonic_NAE=ConditionMnemonic_B, ConditionMnemonic_C=ConditionMnemonic_B,
373+ ConditionMnemonic_NB=3, ConditionMnemonic_AE=ConditionMnemonic_NB, ConditionMnemonic_NC=ConditionMnemonic_NB,
374+ ConditionMnemonic_Z=4, ConditionMnemonic_E=ConditionMnemonic_Z,
375+ ConditionMnemonic_NZ=5, ConditionMnemonic_NE=ConditionMnemonic_NZ,
376+ ConditionMnemonic_BE=6, ConditionMnemonic_NA=ConditionMnemonic_BE,
377+ ConditionMnemonic_NBE=7, ConditionMnemonic_A=ConditionMnemonic_NBE,
378+
379+ ConditionMnemonic_S=8,
380+ ConditionMnemonic_NS=9,
381+ ConditionMnemonic_P=10, ConditionMnemonic_PE=ConditionMnemonic_P,
382+ ConditionMnemonic_NP=11, ConditionMnemonic_PO=ConditionMnemonic_NP,
383+ ConditionMnemonic_L=12, ConditionMnemonic_NGE=ConditionMnemonic_L,
384+ ConditionMnemonic_NL=13, ConditionMnemonic_GE=ConditionMnemonic_NL,
385+ ConditionMnemonic_LE=14, ConditionMnemonic_NG=ConditionMnemonic_LE,
386+ ConditionMnemonic_NLE=15, ConditionMnemonic_G=ConditionMnemonic_NLE,
387+ ConditionMnemonic_Count=16
388+};
389+
390+
391+#define CCM(prefix,cond) Mnemonic_##prefix##cond=Mnemonic_##prefix##cc+ConditionMnemonic_##cond
392+
393+//=========================================================================================================
394+enum Mnemonic {
395+
396+Mnemonic_NULL=0, Mnemonic_Null=Mnemonic_NULL,
397+Mnemonic_ADC, // Add with Carry
398+Mnemonic_ADD, // Add
399+Mnemonic_ADDSD, // Add Scalar Double-Precision Floating-Point Values
400+Mnemonic_ADDSS, // Add Scalar Single-Precision Floating-Point Values
401+Mnemonic_AND, // Logical AND
402+
403+Mnemonic_BSF, // Bit scan forward
404+Mnemonic_BSR, // Bit scan reverse
405+
406+Mnemonic_CALL, // Call Procedure
407+Mnemonic_CMC, // Complement Carry Flag
408+Mnemonic_CWD, Mnemonic_CDQ=Mnemonic_CWD,// Convert Word to Doubleword/Convert Doubleword to Qua T dword
409+Mnemonic_CMOVcc, // Conditional Move
410+ CCM(CMOV,O),
411+ CCM(CMOV,NO),
412+ CCM(CMOV,B), CCM(CMOV,NAE), CCM(CMOV,C),
413+ CCM(CMOV,NB), CCM(CMOV,AE), CCM(CMOV,NC),
414+ CCM(CMOV,Z), CCM(CMOV,E),
415+ CCM(CMOV,NZ), CCM(CMOV,NE),
416+ CCM(CMOV,BE), CCM(CMOV,NA),
417+ CCM(CMOV,NBE), CCM(CMOV,A),
418+
419+ CCM(CMOV,S),
420+ CCM(CMOV,NS),
421+ CCM(CMOV,P), CCM(CMOV,PE),
422+ CCM(CMOV,NP), CCM(CMOV,PO),
423+ CCM(CMOV,L), CCM(CMOV,NGE),
424+ CCM(CMOV,NL), CCM(CMOV,GE),
425+ CCM(CMOV,LE), CCM(CMOV,NG),
426+ CCM(CMOV,NLE), CCM(CMOV,G),
427+
428+Mnemonic_CMP, // Compare Two Operands
429+Mnemonic_CMPXCHG, // Compare and exchange
430+Mnemonic_CMPXCHG8B, // Compare and Exchange 8 Bytes
431+Mnemonic_CMPSB, // Compare Two Bytes at DS:ESI and ES:EDI
432+Mnemonic_CMPSW, // Compare Two Words at DS:ESI and ES:EDI
433+Mnemonic_CMPSD, // Compare Two Doublewords at DS:ESI and ES:EDI
434+//
435+// double -> float
436+Mnemonic_CVTSD2SS, // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
437+// double -> I_32
438+Mnemonic_CVTSD2SI, // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer
439+// double [truncated] -> I_32
440+Mnemonic_CVTTSD2SI, // Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Doubleword Integer
441+//
442+// float -> double
443+Mnemonic_CVTSS2SD, // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
444+// float -> I_32
445+Mnemonic_CVTSS2SI, // Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer
446+// float [truncated] -> I_32
447+Mnemonic_CVTTSS2SI, // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
448+//
449+// I_32 -> double
450+Mnemonic_CVTSI2SD, // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
451+// I_32 -> float
452+Mnemonic_CVTSI2SS, // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
453+
454+Mnemonic_COMISD, // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS
455+Mnemonic_COMISS, // Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS
456+Mnemonic_DEC, // Decrement by 1
457+//Mnemonic_DIV, // Unsigned Divide
458+Mnemonic_DIVSD, // Divide Scalar Double-Precision Floating-Point Values
459+Mnemonic_DIVSS, // Divide Scalar Single-Precision Floating-Point Values
460+
461+#ifdef _HAVE_MMX_
462+Mnemonic_EMMS, // Empty MMX Technology State
463+#endif
464+
465+Mnemonic_ENTER, // ENTER-Make Stack Frame for Procedure Parameters
466+Mnemonic_FLDCW, // Load FPU control word
467+Mnemonic_FADDP,
468+Mnemonic_FLDZ,
469+Mnemonic_FADD,
470+Mnemonic_FSUBP,
471+Mnemonic_FSUB,
472+Mnemonic_FISUB,
473+Mnemonic_FMUL,
474+Mnemonic_FMULP,
475+Mnemonic_FDIVP,
476+Mnemonic_FDIV,
477+Mnemonic_FUCOMPP,
478+Mnemonic_FRNDINT,
479+Mnemonic_FNSTCW, // Store FPU control word
480+Mnemonic_FSTSW, // Store FPU status word
481+Mnemonic_FNSTSW, // Store FPU status word
482+//Mnemonic_FDECSTP, // Decrement Stack-Top Pointer
483+Mnemonic_FILD, // Load Integer
484+Mnemonic_FLD, // Load Floating Point Value
485+Mnemonic_FLDLG2,
486+Mnemonic_FLDLN2,
487+Mnemonic_FLD1,
488+
489+Mnemonic_FCLEX, // Clear Exceptions
490+Mnemonic_FCHS, // Change sign of ST0
491+Mnemonic_FNCLEX, // Clear Exceptions
492+
493+//Mnemonic_FINCSTP, // Increment Stack-Top Pointer
494+Mnemonic_FIST, // Store Integer
495+Mnemonic_FISTP, // Store Integer, pop FPU stack
496+Mnemonic_FISTTP, // Store Integer with Truncation
497+Mnemonic_FPREM, // Partial Remainder
498+Mnemonic_FPREM1, // Partial Remainder
499+Mnemonic_FST, // Store Floating Point Value
500+Mnemonic_FSTP, // Store Floating Point Value and pop the FP stack
501+Mnemonic_FSQRT, //Computes the square root of the source value in the stack and pop the FP stack
502+Mnemonic_FABS, //Computes the absolute value of the source value in the stack and pop the FP stack
503+Mnemonic_FSIN, //Computes the sine of the source value in the stack and pop the FP stack
504+Mnemonic_FCOS, //Computes the cosine of the source value in the stack and pop the FP stack
505+Mnemonic_FPTAN, //Computes the tangent of the source value in the stack and pop the FP stack
506+Mnemonic_FYL2X,
507+Mnemonic_FYL2XP1,
508+Mnemonic_F2XM1,
509+Mnemonic_FPATAN,
510+Mnemonic_FXCH,
511+Mnemonic_FSCALE,
512+
513+Mnemonic_XCHG,
514+Mnemonic_DIV, // Unsigned Divide
515+Mnemonic_IDIV, // Signed Divide
516+Mnemonic_MUL, // Unsigned Multiply
517+Mnemonic_IMUL, // Signed Multiply
518+Mnemonic_INC, // Increment by 1
519+Mnemonic_INT3, // Call break point
520+Mnemonic_Jcc, // Jump if Condition Is Met
521+ CCM(J,O),
522+ CCM(J,NO),
523+ CCM(J,B), CCM(J,NAE), CCM(J,C),
524+ CCM(J,NB), CCM(J,AE), CCM(J,NC),
525+ CCM(J,Z), CCM(J,E),
526+ CCM(J,NZ), CCM(J,NE),
527+ CCM(J,BE), CCM(J,NA),
528+ CCM(J,NBE), CCM(J,A),
529+ CCM(J,S),
530+ CCM(J,NS),
531+ CCM(J,P), CCM(J,PE),
532+ CCM(J,NP), CCM(J,PO),
533+ CCM(J,L), CCM(J,NGE),
534+ CCM(J,NL), CCM(J,GE),
535+ CCM(J,LE), CCM(J,NG),
536+ CCM(J,NLE), CCM(J,G),
537+Mnemonic_JMP, // Jump
538+Mnemonic_LEA, // Load Effective Address
539+Mnemonic_LEAVE, // High Level Procedure Exit
540+Mnemonic_LOOP, // Loop according to ECX counter
541+Mnemonic_LOOPE, // Loop according to ECX counter
542+Mnemonic_LOOPNE, Mnemonic_LOOPNZ = Mnemonic_LOOPNE, // Loop according to ECX
543+Mnemonic_LAHF, // Load Flags into AH
544+Mnemonic_MOV, // Move
545+Mnemonic_MOVD, // Move Double word
546+Mnemonic_MOVQ, // Move Quadword
547+/*Mnemonic_MOVS, // Move Data from String to String*/
548+// MOVS is a special case: see encoding table for more details,
549+Mnemonic_MOVS8, Mnemonic_MOVS16, Mnemonic_MOVS32, Mnemonic_MOVS64,
550+//
551+Mnemonic_MOVAPD, // Move Scalar Double-Precision Floating-Point Value
552+Mnemonic_MOVSD, // Move Scalar Double-Precision Floating-Point Value
553+Mnemonic_MOVSS, // Move Scalar Single-Precision Floating-Point Values
554+Mnemonic_MOVSX, // Move with Sign-Extension
555+Mnemonic_MOVZX, // Move with Zero-Extend
556+//Mnemonic_MUL, // Unsigned Multiply
557+Mnemonic_MULSD, // Multiply Scalar Double-Precision Floating-Point Values
558+Mnemonic_MULSS, // Multiply Scalar Single-Precision Floating-Point Values
559+Mnemonic_NEG, // Two's Complement Negation
560+Mnemonic_NOP, // No Operation
561+Mnemonic_NOT, // One's Complement Negation
562+Mnemonic_OR, // Logical Inclusive OR
563+Mnemonic_PREFETCH, // prefetch
564+
565+#ifdef _HAVE_MMX_
566+ Mnemonic_PADDQ, // Add Packed Quadword Integers
567+ Mnemonic_PAND, // Logical AND
568+ Mnemonic_POR, // Bitwise Logical OR
569+ Mnemonic_PSUBQ, // Subtract Packed Quadword Integers
570+#endif
571+
572+Mnemonic_PXOR, // Logical Exclusive OR
573+Mnemonic_POP, // Pop a Value from the Stack
574+Mnemonic_POPFD, // Pop a Value of EFLAGS register from the Stack
575+Mnemonic_PUSH, // Push Word or Doubleword Onto the Stack
576+Mnemonic_PUSHFD, // Push EFLAGS Doubleword Onto the Stack
577+Mnemonic_RET, // Return from Procedure
578+
579+Mnemonic_SETcc, // Set Byte on Condition
580+ CCM(SET,O),
581+ CCM(SET,NO),
582+ CCM(SET,B), CCM(SET,NAE), CCM(SET,C),
583+ CCM(SET,NB), CCM(SET,AE), CCM(SET,NC),
584+ CCM(SET,Z), CCM(SET,E),
585+ CCM(SET,NZ), CCM(SET,NE),
586+ CCM(SET,BE), CCM(SET,NA),
587+ CCM(SET,NBE), CCM(SET,A),
588+ CCM(SET,S),
589+ CCM(SET,NS),
590+ CCM(SET,P), CCM(SET,PE),
591+ CCM(SET,NP), CCM(SET,PO),
592+ CCM(SET,L), CCM(SET,NGE),
593+ CCM(SET,NL), CCM(SET,GE),
594+ CCM(SET,LE), CCM(SET,NG),
595+ CCM(SET,NLE), CCM(SET,G),
596+
597+Mnemonic_SAL, Mnemonic_SHL=Mnemonic_SAL,// Shift left
598+Mnemonic_SAR, // Shift right
599+Mnemonic_ROR, // Rotate right
600+Mnemonic_RCR, // Rotate right through CARRY flag
601+Mnemonic_ROL, // Rotate left
602+Mnemonic_RCL, // Rotate left through CARRY flag
603+Mnemonic_SHR, // Unsigned shift right
604+Mnemonic_SHRD, // Double Precision Shift Right
605+Mnemonic_SHLD, // Double Precision Shift Left
606+
607+Mnemonic_SBB, // Integer Subtraction with Borrow
608+Mnemonic_SUB, // Subtract
609+Mnemonic_SUBSD, // Subtract Scalar Double-Precision Floating-Point Values
610+Mnemonic_SUBSS, // Subtract Scalar Single-Precision Floating-Point Values
611+
612+Mnemonic_TEST, // Logical Compare
613+
614+Mnemonic_UCOMISD, // Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS
615+Mnemonic_UCOMISS, // Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS
616+
617+Mnemonic_XOR, // Logical Exclusive OR
618+//
619+// packed things,
620+//
621+Mnemonic_XORPD, // Bitwise Logical XOR for Double-Precision Floating-Point Values
622+Mnemonic_XORPS, // Bitwise Logical XOR for Single-Precision Floating-Point Values
623+
624+Mnemonic_CVTDQ2PD, // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values
625+Mnemonic_CVTTPD2DQ, // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers
626+
627+Mnemonic_CVTDQ2PS, // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values
628+Mnemonic_CVTTPS2DQ, // Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers
629+//
630+// String operations
631+//
632+Mnemonic_STD, // Set direction flag
633+Mnemonic_CLD, // Clear direction flag
634+Mnemonic_SCAS, // Scan string
635+Mnemonic_STOS, // Store string
636+
637+//
638+Mnemonic_WAIT, // Check pending pending unmasked floating-point exception
639+//
640+Mnemonic_Count
641+};
642+
643+#undef CCM
644+#endif
645+
646+/**
647+ * @brief Instruction prefixes, according to arch manual.
648+ */
649+typedef enum InstPrefix {
650+ InstPrefix_Null = 0,
651+ // Group 1
652+ InstPrefix_LOCK = 0xF0,
653+ InstPrefix_REPNE = 0xF2,
654+ InstPrefix_REPNZ = InstPrefix_REPNE,
655+ InstPrefix_REP = 0xF3, InstPrefix_REPZ = InstPrefix_REP,
656+ // Group 2
657+ InstPrefix_CS = 0x2E,
658+ InstPrefix_SS = 0x36,
659+ InstPrefix_DS = 0x3E,
660+ InstPrefix_ES = 0x26,
661+ InstPrefix_FS = 0x64,
662+ InstPrefix_GS = 0x65,
663+ //
664+ InstPrefix_HintTaken = 0x3E,
665+ InstPrefix_HintNotTaken = 0x2E,
666+ // Group 3
667+ InstPrefix_OpndSize = 0x66,
668+ // Group 4
669+ InstPrefix_AddrSize = 0x67
670+} InstPrefix;
671+
672+inline unsigned getSizeBytes(OpndSize sz)
673+{
674+ if (sz==OpndSize_64) { return 8; }
675+ if (sz==OpndSize_32) { return 4; }
676+ if (sz==OpndSize_16) { return 2; }
677+ if (sz==OpndSize_8) { return 1; }
678+ assert(false);
679+ return 0;
680+}
681+
682+inline bool isRegKind(OpndKind kind)
683+{
684+ return OpndKind_GPReg<= kind && kind<=OpndKind_MaxRegKind;
685+}
686+
687+/**
688+ * @brief Returns RegName for a given name.
689+ *
690+ * Name is case-insensitive.
691+ * @param regname - string name of a register
692+ * @return RegName for the given name, or RegName_Null if name is invalid
693+ */
694+RegName getRegName(const char * regname);
695+/**
696+ * Constructs RegName from the given OpndKind, size and index.
697+ */
698+inline RegName getRegName(OpndKind k, OpndSize s, int idx)
699+{
700+ return (RegName)REGNAME(k,s,idx);
701+}
702+/**
703+ * Extracts a bit mask with a bit set at the position of the register's index.
704+ */
705+inline unsigned getRegMask(RegName reg)
706+{
707+ return 1<<(reg&0xff);
708+}
709+/**
710+ * @brief Extracts OpndKind from the RegName.
711+ */
712+inline OpndKind getRegKind(RegName reg)
713+{
714+ return (OpndKind)(reg>>24);
715+}
716+/**
717+ * @brief Extracts OpndSize from RegName.
718+ */
719+inline OpndSize getRegSize(RegName reg)
720+{
721+ return (OpndSize)((reg>>16)&0xFF);
722+}
723+/**
724+ * Extracts an index from the given RegName.
725+ */
726+inline unsigned char getRegIndex(RegName reg)
727+{
728+ return (unsigned char)(reg&0xFF);
729+}
730+/**
731+ * Returns a string name of the given RegName. The name returned is in upper-case.
732+ * Returns NULL if invalid RegName specified.
733+ */
734+const char * getRegNameString(RegName reg);
735+/**
736+ * Returns string name of a given OpndSize.
737+ * Returns NULL if invalid OpndSize passed.
738+ */
739+const char * getOpndSizeString(OpndSize size);
740+/**
741+ * Returns OpndSize passed by its string representation (case insensitive).
742+ * Returns OpndSize_Null if invalid string specified.
743+ * The 'sizeString' can not be NULL.
744+ */
745+OpndSize getOpndSize(const char * sizeString);
746+/**
747+ * Returns string name of a given OpndKind.
748+ * Returns NULL if the passed kind is invalid.
749+ */
750+const char * getOpndKindString(OpndKind kind);
751+/**
752+ * Returns OpndKind found by its string representation (case insensitive).
753+ * Returns OpndKind_Null if the name is invalid.
754+ * The 'kindString' can not be NULL.
755+ */
756+OpndKind getOpndKind(const char * kindString);
757+/**
758+ *
759+ */
760+const char * getConditionString(ConditionMnemonic cm);
761+
762+/**
763+ * Constructs an RegName with the same index and kind, but with a different size from
764+ * the given RegName (i.e. getRegAlias(EAX, OpndSize_16) => AX; getRegAlias(BL, OpndSize_32) => EBX).
765+ * The constructed RegName is not checked in any way and thus may be invalid.
766+ * Note, that the aliasing does not work for at least AH,BH,CH,DH, ESI, EDI, ESP and EBP regs.
767+ */
768+inline RegName getAliasReg(RegName reg, OpndSize sz)
769+{
770+ return (RegName)REGNAME(getRegKind(reg), sz, getRegIndex(reg));
771+}
772+
773+/**
774+ * brief Tests two RegName-s of the same kind for equality.
775+ *
776+ * @note Does work for 8 bit general purpose registers (AH, AL, BH, BL, etc).
777+ */
778+inline bool equals(RegName r0, RegName r1)
779+{
780+ return getRegKind(r0) == getRegKind(r1) &&
781+ getRegIndex(r0) == getRegIndex(r1);
782+}
783+
784+ENCODER_NAMESPACE_END
785+
786+#endif // ifndef _ENCODER_DEFS_H_
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/enc_defs_ext.h
@@ -0,0 +1,365 @@
1+/*
2+ * Copyright (C) 2012 The Android Open Source Project
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+
17+#ifndef _ENCODER_DEFS_EXT_H_
18+#define _ENCODER_DEFS_EXT_H_
19+
20+
21+// Used to isolate experimental or being tuned encoder into a separate
22+// namespace so it can coexist with a stable one in the same bundle.
23+#ifdef ENCODER_ISOLATE
24+ #define ENCODER_NAMESPACE_START namespace enc_ia32 {
25+ #define ENCODER_NAMESPACE_END };
26+#else
27+ #define ENCODER_NAMESPACE_START
28+ #define ENCODER_NAMESPACE_END
29+#endif
30+
31+ENCODER_NAMESPACE_START
32+typedef enum OpndSize {
33+ /**
34+ * A change must be balanced with at least the following places:
35+ * Ia32IRConstants.h :: getByteSize() uses some presumptions about OpndSize_ values
36+ * Ia32::Constraint-s use the OpndSize as a mask
37+ * encoder.cpp & encoder_master_info.cpp uses OpndSize as an index for hashing
38+ * - perhaps there are much more places
39+ */
40+ OpndSize_Null = 0,
41+ OpndSize_8 = 0x01,
42+ OpndSize_16 = 0x02,
43+ OpndSize_32 = 0x04,
44+ OpndSize_64 = 0x08,
45+#if !defined(TESTING_ENCODER)
46+ OpndSize_80 = 0x10,
47+ OpndSize_128 = 0x20,
48+#endif
49+ OpndSize_Max,
50+ OpndSize_Any = 0x3F,
51+ OpndSize_Default = OpndSize_Any
52+} OpndSize;
53+
54+/**
55+ * Conditional mnemonics.
56+ * The values match the 'real' (==processor's) values of the appropriate
57+ * condition values used in the opcodes.
58+ */
59+typedef enum ConditionMnemonic {
60+
61+ ConditionMnemonic_O=0,
62+ ConditionMnemonic_NO=1,
63+ ConditionMnemonic_B=2, ConditionMnemonic_NAE=ConditionMnemonic_B, ConditionMnemonic_C=ConditionMnemonic_B,
64+ ConditionMnemonic_NB=3, ConditionMnemonic_AE=ConditionMnemonic_NB, ConditionMnemonic_NC=ConditionMnemonic_NB,
65+ ConditionMnemonic_Z=4, ConditionMnemonic_E=ConditionMnemonic_Z,
66+ ConditionMnemonic_NZ=5, ConditionMnemonic_NE=ConditionMnemonic_NZ,
67+ ConditionMnemonic_BE=6, ConditionMnemonic_NA=ConditionMnemonic_BE,
68+ ConditionMnemonic_NBE=7, ConditionMnemonic_A=ConditionMnemonic_NBE,
69+
70+ ConditionMnemonic_S=8,
71+ ConditionMnemonic_NS=9,
72+ ConditionMnemonic_P=10, ConditionMnemonic_PE=ConditionMnemonic_P,
73+ ConditionMnemonic_NP=11, ConditionMnemonic_PO=ConditionMnemonic_NP,
74+ ConditionMnemonic_L=12, ConditionMnemonic_NGE=ConditionMnemonic_L,
75+ ConditionMnemonic_NL=13, ConditionMnemonic_GE=ConditionMnemonic_NL,
76+ ConditionMnemonic_LE=14, ConditionMnemonic_NG=ConditionMnemonic_LE,
77+ ConditionMnemonic_NLE=15, ConditionMnemonic_G=ConditionMnemonic_NLE,
78+ ConditionMnemonic_Count=16
79+} ConditionMnemonic;
80+
81+
82+#define CCM(prefix,cond) Mnemonic_##prefix##cond=Mnemonic_##prefix##cc+ConditionMnemonic_##cond
83+
84+//=========================================================================================================
85+typedef enum Mnemonic {
86+
87+Mnemonic_NULL=0, Mnemonic_Null=Mnemonic_NULL,
88+Mnemonic_JMP, // Jump
89+Mnemonic_MOV, // Move
90+Mnemonic_Jcc, // Jump if Condition Is Met
91+ CCM(J,O),
92+ CCM(J,NO),
93+ CCM(J,B), CCM(J,NAE), CCM(J,C),
94+ CCM(J,NB), CCM(J,AE), CCM(J,NC),
95+ CCM(J,Z), CCM(J,E),
96+ CCM(J,NZ), CCM(J,NE),
97+ CCM(J,BE), CCM(J,NA),
98+ CCM(J,NBE), CCM(J,A),
99+ CCM(J,S),
100+ CCM(J,NS),
101+ CCM(J,P), CCM(J,PE),
102+ CCM(J,NP), CCM(J,PO),
103+ CCM(J,L), CCM(J,NGE),
104+ CCM(J,NL), CCM(J,GE),
105+ CCM(J,LE), CCM(J,NG),
106+ CCM(J,NLE), CCM(J,G),
107+Mnemonic_CALL, // Call Procedure
108+
109+Mnemonic_ADC, // Add with Carry
110+Mnemonic_ADD, // Add
111+Mnemonic_ADDSD, // Add Scalar Double-Precision Floating-Point Values
112+Mnemonic_ADDSS, // Add Scalar Single-Precision Floating-Point Values
113+Mnemonic_AND, // Logical AND
114+
115+Mnemonic_BSF, // Bit scan forward
116+Mnemonic_BSR, // Bit scan reverse
117+
118+Mnemonic_CMC, // Complement Carry Flag
119+Mnemonic_CWD, Mnemonic_CDQ=Mnemonic_CWD,// Convert Word to Doubleword/Convert Doubleword to Qua T dword
120+Mnemonic_CMOVcc, // Conditional Move
121+ CCM(CMOV,O),
122+ CCM(CMOV,NO),
123+ CCM(CMOV,B), CCM(CMOV,NAE), CCM(CMOV,C),
124+ CCM(CMOV,NB), CCM(CMOV,AE), CCM(CMOV,NC),
125+ CCM(CMOV,Z), CCM(CMOV,E),
126+ CCM(CMOV,NZ), CCM(CMOV,NE),
127+ CCM(CMOV,BE), CCM(CMOV,NA),
128+ CCM(CMOV,NBE), CCM(CMOV,A),
129+
130+ CCM(CMOV,S),
131+ CCM(CMOV,NS),
132+ CCM(CMOV,P), CCM(CMOV,PE),
133+ CCM(CMOV,NP), CCM(CMOV,PO),
134+ CCM(CMOV,L), CCM(CMOV,NGE),
135+ CCM(CMOV,NL), CCM(CMOV,GE),
136+ CCM(CMOV,LE), CCM(CMOV,NG),
137+ CCM(CMOV,NLE), CCM(CMOV,G),
138+
139+Mnemonic_CMP, // Compare Two Operands
140+Mnemonic_CMPXCHG, // Compare and exchange
141+Mnemonic_CMPXCHG8B, // Compare and Exchange 8 Bytes
142+Mnemonic_CMPSB, // Compare Two Bytes at DS:ESI and ES:EDI
143+Mnemonic_CMPSW, // Compare Two Words at DS:ESI and ES:EDI
144+Mnemonic_CMPSD, // Compare Two Doublewords at DS:ESI and ES:EDI
145+//
146+// double -> float
147+Mnemonic_CVTSD2SS, // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
148+// double -> I_32
149+Mnemonic_CVTSD2SI, // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer
150+// double [truncated] -> I_32
151+Mnemonic_CVTTSD2SI, // Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Doubleword Integer
152+//
153+// float -> double
154+Mnemonic_CVTSS2SD, // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
155+// float -> I_32
156+Mnemonic_CVTSS2SI, // Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer
157+// float [truncated] -> I_32
158+Mnemonic_CVTTSS2SI, // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
159+//
160+// I_32 -> double
161+Mnemonic_CVTSI2SD, // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
162+// I_32 -> float
163+Mnemonic_CVTSI2SS, // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
164+
165+Mnemonic_COMISD, // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS
166+Mnemonic_COMISS, // Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS
167+Mnemonic_DEC, // Decrement by 1
168+Mnemonic_DIVSD, // Divide Scalar Double-Precision Floating-Point Values
169+Mnemonic_DIVSS, // Divide Scalar Single-Precision Floating-Point Values
170+Mnemonic_ENTER, // ENTER-Make Stack Frame for Procedure Parameters
171+Mnemonic_FLDCW, // Load FPU control word
172+Mnemonic_FADDP,
173+Mnemonic_FLDZ,
174+Mnemonic_FADD,
175+Mnemonic_FSUBP,
176+Mnemonic_FSUB,
177+Mnemonic_FISUB,
178+Mnemonic_FMUL,
179+Mnemonic_FMULP,
180+Mnemonic_FDIVP,
181+Mnemonic_FDIV,
182+Mnemonic_FUCOM,
183+Mnemonic_FUCOMI,
184+Mnemonic_FUCOMP,
185+Mnemonic_FUCOMIP,
186+Mnemonic_FUCOMPP,
187+Mnemonic_FRNDINT,
188+Mnemonic_FNSTCW, // Store FPU control word
189+Mnemonic_FSTSW, // Store FPU status word
190+Mnemonic_FNSTSW, // Store FPU status word
191+Mnemonic_FILD, // Load Integer
192+Mnemonic_FLD, // Load Floating Point Value
193+Mnemonic_FLDLG2,
194+Mnemonic_FLDLN2,
195+Mnemonic_FLD1,
196+
197+Mnemonic_FCLEX, // Clear Exceptions
198+Mnemonic_FCHS, // Change sign of ST0
199+Mnemonic_FNCLEX, // Clear Exceptions
200+Mnemonic_FIST, // Store Integer
201+Mnemonic_FISTP, // Store Integer, pop FPU stack
202+Mnemonic_FISTTP, // Store Integer with Truncation
203+Mnemonic_FPREM, // Partial Remainder
204+Mnemonic_FPREM1, // Partial Remainder
205+Mnemonic_FST, // Store Floating Point Value
206+Mnemonic_FSTP, // Store Floating Point Value and pop the FP stack
207+Mnemonic_FSQRT, //Computes the square root of the source value in the stack and pop the FP stack
208+Mnemonic_FABS, //Computes the absolute value of the source value in the stack and pop the FP stack
209+Mnemonic_FSIN, //Computes the sine of the source value in the stack and pop the FP stack
210+Mnemonic_FCOS, //Computes the cosine of the source value in the stack and pop the FP stack
211+Mnemonic_FPTAN, //Computes the tangent of the source value in the stack and pop the FP stack
212+Mnemonic_FYL2X,
213+Mnemonic_FYL2XP1,
214+Mnemonic_F2XM1,
215+Mnemonic_FPATAN,
216+Mnemonic_FXCH,
217+Mnemonic_FSCALE,
218+
219+Mnemonic_XCHG,
220+Mnemonic_DIV, // Unsigned Divide
221+Mnemonic_IDIV, // Signed Divide
222+Mnemonic_MUL, // Unsigned Multiply
223+Mnemonic_IMUL, // Signed Multiply
224+Mnemonic_INC, // Increment by 1
225+Mnemonic_INT3, // Call break point
226+
227+Mnemonic_LEA, // Load Effective Address
228+Mnemonic_LEAVE, // High Level Procedure Exit
229+Mnemonic_LOOP, // Loop according to ECX counter
230+Mnemonic_LOOPE, // Loop according to ECX counter
231+Mnemonic_LOOPNE, Mnemonic_LOOPNZ = Mnemonic_LOOPNE, // Loop according to ECX
232+Mnemonic_LAHF, // Load Flags into AH
233+Mnemonic_MOVD, // Move Double word
234+Mnemonic_MOVQ, // Move Quadword
235+Mnemonic_MOVS8,
236+Mnemonic_MOVS16,
237+Mnemonic_MOVS32,
238+Mnemonic_MOVS64,
239+Mnemonic_MOVAPD, // Move Scalar Double-Precision Floating-Point Value
240+Mnemonic_MOVSD, // Move Scalar Double-Precision Floating-Point Value
241+Mnemonic_MOVSS, // Move Scalar Single-Precision Floating-Point Values
242+Mnemonic_MOVSX, // Move with Sign-Extension
243+Mnemonic_MOVZX, // Move with Zero-Extend
244+Mnemonic_MULSD, // Multiply Scalar Double-Precision Floating-Point Values
245+Mnemonic_MULSS, // Multiply Scalar Single-Precision Floating-Point Values
246+Mnemonic_NEG, // Two's Complement Negation
247+Mnemonic_NOP, // No Operation
248+Mnemonic_NOT, // One's Complement Negation
249+Mnemonic_OR, // Logical Inclusive OR
250+Mnemonic_PREFETCH, // prefetch
251+Mnemonic_PADDQ, // Add Packed Quadword Integers
252+Mnemonic_PAND, // Logical AND
253+Mnemonic_POR, // Bitwise Logical OR
254+Mnemonic_PSUBQ, // Subtract Packed Quadword Integers
255+Mnemonic_PANDN,
256+Mnemonic_PSLLQ,
257+Mnemonic_PSRLQ,
258+Mnemonic_PXOR, // Logical Exclusive OR
259+Mnemonic_POP, // Pop a Value from the Stack
260+Mnemonic_POPFD, // Pop a Value of EFLAGS register from the Stack
261+Mnemonic_PUSH, // Push Word or Doubleword Onto the Stack
262+Mnemonic_PUSHFD, // Push EFLAGS Doubleword Onto the Stack
263+Mnemonic_RET, // Return from Procedure
264+
265+Mnemonic_SETcc, // Set Byte on Condition
266+ CCM(SET,O),
267+ CCM(SET,NO),
268+ CCM(SET,B), CCM(SET,NAE), CCM(SET,C),
269+ CCM(SET,NB), CCM(SET,AE), CCM(SET,NC),
270+ CCM(SET,Z), CCM(SET,E),
271+ CCM(SET,NZ), CCM(SET,NE),
272+ CCM(SET,BE), CCM(SET,NA),
273+ CCM(SET,NBE), CCM(SET,A),
274+ CCM(SET,S),
275+ CCM(SET,NS),
276+ CCM(SET,P), CCM(SET,PE),
277+ CCM(SET,NP), CCM(SET,PO),
278+ CCM(SET,L), CCM(SET,NGE),
279+ CCM(SET,NL), CCM(SET,GE),
280+ CCM(SET,LE), CCM(SET,NG),
281+ CCM(SET,NLE), CCM(SET,G),
282+
283+Mnemonic_SAL, Mnemonic_SHL=Mnemonic_SAL,// Shift left
284+Mnemonic_SAR, // Unsigned shift right
285+Mnemonic_ROR, // Rotate right
286+Mnemonic_RCR, // Rotate right through CARRY flag
287+Mnemonic_ROL, // Rotate left
288+Mnemonic_RCL, // Rotate left through CARRY flag
289+Mnemonic_SHR, // Signed shift right
290+Mnemonic_SHRD, // Double Precision Shift Right
291+Mnemonic_SHLD, // Double Precision Shift Left
292+
293+Mnemonic_SBB, // Integer Subtraction with Borrow
294+Mnemonic_SUB, // Subtract
295+Mnemonic_SUBSD, // Subtract Scalar Double-Precision Floating-Point Values
296+Mnemonic_SUBSS, // Subtract Scalar Single-Precision Floating-Point Values
297+
298+Mnemonic_TEST, // Logical Compare
299+
300+Mnemonic_UCOMISD, // Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS
301+Mnemonic_UCOMISS, // Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS
302+
303+Mnemonic_XOR, // Logical Exclusive OR
304+//
305+// packed things,
306+//
307+Mnemonic_XORPD, // Bitwise Logical XOR for Double-Precision Floating-Point Values
308+Mnemonic_XORPS, // Bitwise Logical XOR for Single-Precision Floating-Point Values
309+
310+Mnemonic_CVTDQ2PD, // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values
311+Mnemonic_CVTTPD2DQ, // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers
312+
313+Mnemonic_CVTDQ2PS, // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values
314+Mnemonic_CVTTPS2DQ, // Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers
315+//
316+// String operations
317+//
318+Mnemonic_STD, // Set direction flag
319+Mnemonic_CLD, // Clear direction flag
320+Mnemonic_SCAS, // Scan string
321+Mnemonic_STOS, // Store string
322+
323+//
324+Mnemonic_WAIT, // Check pending pending unmasked floating-point exception
325+Mnemonic_PADDB, //!< Add packed byte integers
326+Mnemonic_PADDW, //!< Add packed word integers
327+Mnemonic_PADDD, //!< Add packed doubleword integers
328+Mnemonic_PSUBB, //!< Subtract packed byte integers
329+Mnemonic_PSUBW, //!< Subtract packed word integers
330+Mnemonic_PSUBD, //!< Subtract packed doubleword integers
331+Mnemonic_PMULLW, //!< Multiply packed word integers
332+Mnemonic_PMULLD, //!< Multiply packed doubleword integers
333+Mnemonic_PSLLW, //!< Shift words left and shift in 0s
334+Mnemonic_PSLLD, //!< Shift doublewords left and shift in 0s
335+Mnemonic_PSRAW, //!< Shift words right and shift in sign bits
336+Mnemonic_PSRAD, //!< Shift doublewords right and shift in sign bits
337+Mnemonic_PSRLW, //!< Shift words right and shift in 0s
338+Mnemonic_PSRLD, //!< Shift doublewords right and shift in 0s
339+Mnemonic_PMOVSXBW, //!< Sign extend 8 packed signed 8-bit integers in the low 8 bytes to 8 packed signed 16-bit integers
340+Mnemonic_PSHUFB, //!< Shuffle bytes
341+Mnemonic_PSHUFD, //!< Shuffle doublewords
342+Mnemonic_PSHUFLW, //!< Shuffle packed low words
343+Mnemonic_PSHUFHW, //!< Shuffle packed high words
344+Mnemonic_PHADDSW, //!< Add 16-bit signed integers horizontally, then pack saturated integers
345+Mnemonic_PHADDW, //!< Add 16-bit signed integers horizontally, then pack
346+Mnemonic_PHADDD, //!< Add 32-bit signed integers horizontally, then pack
347+Mnemonic_PHSUBSW, //!< Subtract 16-bit signed integers horizontally, then pack saturated integers
348+Mnemonic_PHSUBW, //!< Subtract 16-bit signed integers horizontally, then pack
349+Mnemonic_PHSUBD, //!< Subtract 32-bit signed integers horizontally, then pack
350+Mnemonic_PEXTRB, //!< Extract a byte integer value from xmm
351+Mnemonic_PEXTRW, //!< Extract a word integer value from xmm
352+Mnemonic_PEXTRD, //!< Extract a doubleword integer value from xmm
353+Mnemonic_MOVDQA, //!< Move aligned double quadword
354+Mnemonic_SHUFPS, //!< Shuffle single words
355+Mnemonic_MOVAPS, //!< Move aligned single word
356+
357+//
358+Mnemonic_Count
359+} Mnemonic;
360+
361+#undef CCM
362+
363+ENCODER_NAMESPACE_END
364+
365+#endif // ifndef _ENCODER_DEFS_EXT_H_
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/enc_prvt.h
@@ -0,0 +1,382 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+#ifndef __ENC_PRVT_H_INCLUDED__
21+#define __ENC_PRVT_H_INCLUDED__
22+
23+#include "enc_base.h"
24+
25+ENCODER_NAMESPACE_START
26+/*
27+ * @file
28+ * @brief Contains some definitions/constants and other stuff used by the
29+ * Encoder internally.
30+ */
31+
32+enum OpcodeByteKind {
33+ //OpcodeByteKind_Opcode = 0x0000,
34+ OpcodeByteKind_ZeroOpcodeByte = 0x0100,
35+ //
36+ // The names _SlashR, _SlahsNum, _ib, _iw, etc
37+ // represent the appropriate abbreviations used
38+ // in the mnemonic descriptions in the Intel's arch manual.
39+ //
40+ OpcodeByteKind_SlashR = 0x0200,
41+ OpcodeByteKind_SlashNum = 0x0300,
42+ OpcodeByteKind_ib = 0x0400,
43+ OpcodeByteKind_iw = 0x0500,
44+ OpcodeByteKind_id = 0x0600,
45+#ifdef _EM64T_
46+ OpcodeByteKind_io = 0x0700,
47+#endif
48+ OpcodeByteKind_cb = 0x0800,
49+ OpcodeByteKind_cw = 0x0900,
50+ OpcodeByteKind_cd = 0x0A00,
51+ //OpcodeByteKind_cp = 0x0B00,
52+ //OpcodeByteKind_co = 0x0C00,
53+ //OpcodeByteKind_ct = 0x0D00,
54+
55+ OpcodeByteKind_rb = 0x0E00,
56+ OpcodeByteKind_rw = 0x0F00,
57+ OpcodeByteKind_rd = 0x1000,
58+#ifdef _EM64T_
59+ OpcodeByteKind_ro = 0x1100,
60+ //OpcodeByteKind_REX = 0x1200,
61+ OpcodeByteKind_REX_W = 0x1300,
62+#endif
63+ OpcodeByteKind_plus_i = 0x1400,
64+ /**
65+ * a special marker, means 'no opcode on the given position'
66+ * used in opcodes array, to specify the empty slot, say
67+ * to fill an em64t-specific opcode on ia32.
68+ * last 'e' made lowercase to avoid a mess with 'F' in
69+ * OpcodeByteKind_LAST .
70+ */
71+ OpcodeByteKind_EMPTY = 0xFFFE,
72+ /**
73+ * a special marker, means 'no more opcodes in the array'
74+ * used in in opcodes array to show that there are no more
75+ * opcodes in the array for a given mnemonic.
76+ */
77+ OpcodeByteKind_LAST = 0xFFFF,
78+ /**
79+ * a mask to extract the OpcodeByteKind
80+ */
81+ OpcodeByteKind_KindMask = 0xFF00,
82+ /**
83+ * a mask to extract the opcode byte when presented
84+ */
85+ OpcodeByteKind_OpcodeMask = 0x00FF
86+};
87+
88+#ifdef USE_ENCODER_DEFINES
89+
90+#define N {0, 0, 0, 0 }
91+#define U {1, 0, 1, OpndRole_Use }
92+#define D {1, 1, 0, OpndRole_Def }
93+#define DU {1, 1, 1, OpndRole_Def|OpndRole_Use }
94+
95+#define U_U {2, 0, 2, OpndRole_Use<<2 | OpndRole_Use }
96+#define D_U {2, 1, 1, OpndRole_Def<<2 | OpndRole_Use }
97+#define D_DU {2, 2, 1, OpndRole_Def<<2 | (OpndRole_Def|OpndRole_Use) }
98+#define DU_U {2, 1, 2, ((OpndRole_Def|OpndRole_Use)<<2 | OpndRole_Use) }
99+#define DU_DU {2, 2, 2, ((OpndRole_Def|OpndRole_Use)<<2 | (OpndRole_Def|OpndRole_Use)) }
100+
101+#define DU_DU_DU {3, 3, 3, ((OpndRole_Def|OpndRole_Use)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | (OpndRole_Def|OpndRole_Use) }
102+#define DU_DU_U {3, 2, 3, (((OpndRole_Def|OpndRole_Use)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | OpndRole_Use) }
103+#define D_DU_U {3, 2, 2, (((OpndRole_Def)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | OpndRole_Use) }
104+#define D_U_U {3, 1, 2, (((OpndRole_Def)<<4) | ((OpndRole_Use)<<2) | OpndRole_Use) }
105+
106+// Special encoding of 0x00 opcode byte. Note: it's all O-s, not zeros.
107+#define OxOO OpcodeByteKind_ZeroOpcodeByte
108+
109+#define Size16 InstPrefix_OpndSize
110+
111+#define _r OpcodeByteKind_SlashR
112+
113+#define _0 OpcodeByteKind_SlashNum|0
114+#define _1 OpcodeByteKind_SlashNum|1
115+#define _2 OpcodeByteKind_SlashNum|2
116+#define _3 OpcodeByteKind_SlashNum|3
117+#define _4 OpcodeByteKind_SlashNum|4
118+#define _5 OpcodeByteKind_SlashNum|5
119+#define _6 OpcodeByteKind_SlashNum|6
120+#define _7 OpcodeByteKind_SlashNum|7
121+
122+// '+i' for floating-point instructions
123+#define _i OpcodeByteKind_plus_i
124+
125+
126+#define ib OpcodeByteKind_ib
127+#define iw OpcodeByteKind_iw
128+#define id OpcodeByteKind_id
129+
130+#define cb OpcodeByteKind_cb
131+#define cw OpcodeByteKind_cw
132+#define cd OpcodeByteKind_cd
133+
134+#define rb OpcodeByteKind_rb
135+#define rw OpcodeByteKind_rw
136+#define rd OpcodeByteKind_rd
137+
138+#define AL {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_AL}
139+#define AH {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_AH}
140+#define AX {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_AX}
141+#define EAX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EAX}
142+#ifdef _EM64T_
143+ #define RAX {OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RAX }
144+#endif
145+
146+#define CL {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_CL}
147+#define ECX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_ECX}
148+#ifdef _EM64T_
149+ #define RCX {OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RCX}
150+#endif
151+
152+#define DX {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_DX}
153+#define EDX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EDX}
154+#ifdef _EM64T_
155+ #define RDX { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RDX }
156+#endif
157+
158+#define ESI {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_ESI}
159+#ifdef _EM64T_
160+ #define RSI { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RSI }
161+#endif
162+
163+#define EDI {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EDI}
164+#ifdef _EM64T_
165+ #define RDI { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RDI }
166+#endif
167+
168+#define r8 {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_Null}
169+#define r16 {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_Null}
170+#define r32 {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_Null}
171+#ifdef _EM64T_
172+ #define r64 { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_Null }
173+#endif
174+
175+#define r_m8 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Any, RegName_Null}
176+#define r_m16 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Any, RegName_Null}
177+#define r_m32 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Any, RegName_Null}
178+
179+#define r_m8s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Signed, RegName_Null}
180+#define r_m16s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Signed, RegName_Null}
181+#define r_m32s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Signed, RegName_Null}
182+
183+#define r_m8u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Zero, RegName_Null}
184+#define r_m16u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Zero, RegName_Null}
185+#define r_m32u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Zero, RegName_Null}
186+
187+//'m' was only used in LEA mnemonic, but is replaced with
188+// set of exact sizes. See more comments for LEA instruction in TheTable.
189+//#define m {OpndKind_Mem, OpndSize_Null, RegName_Null}
190+#define m8 {OpndKind_Mem, OpndSize_8, OpndExt_Any, RegName_Null}
191+#define m16 {OpndKind_Mem, OpndSize_16, OpndExt_Any, RegName_Null}
192+#define m32 {OpndKind_Mem, OpndSize_32, OpndExt_Any, RegName_Null}
193+#define m64 {OpndKind_Mem, OpndSize_64, OpndExt_Any, RegName_Null}
194+#ifdef _EM64T_
195+ #define r_m64 { (OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null }
196+#endif
197+
198+#define imm8 {OpndKind_Imm, OpndSize_8, OpndExt_Any, RegName_Null}
199+#define imm16 {OpndKind_Imm, OpndSize_16, OpndExt_Any, RegName_Null}
200+#define imm32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
201+
202+#define imm8s {OpndKind_Imm, OpndSize_8, OpndExt_Signed, RegName_Null}
203+#define imm16s {OpndKind_Imm, OpndSize_16, OpndExt_Signed, RegName_Null}
204+#define imm32s {OpndKind_Imm, OpndSize_32, OpndExt_Signed, RegName_Null}
205+
206+#define imm8u {OpndKind_Imm, OpndSize_8, OpndExt_Zero, RegName_Null}
207+#define imm16u {OpndKind_Imm, OpndSize_16, OpndExt_Zero, RegName_Null}
208+#define imm32u {OpndKind_Imm, OpndSize_32, OpndExt_Zero, RegName_Null}
209+
210+#ifdef _EM64T_
211+ #define imm64 {OpndKind_Imm, OpndSize_64, OpndExt_Any, RegName_Null }
212+#endif
213+
214+//FIXME: moff-s are in fact memory refs, but presented as immediate.
215+// Need to specify this in OpndDesc.
216+#define moff8 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
217+#define moff16 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
218+#define moff32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
219+#ifdef _EM64T_
220+ #define moff64 {OpndKind_Imm, OpndSize_64, OpndExt_Any, RegName_Null}
221+#endif
222+
223+
224+#define rel8 {OpndKind_Imm, OpndSize_8, OpndExt_Any, RegName_Null}
225+#define rel16 {OpndKind_Imm, OpndSize_16, OpndExt_Any, RegName_Null}
226+#define rel32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null}
227+
228+#define mm64 {OpndKind_MMXReg, OpndSize_64, OpndExt_Any, RegName_Null}
229+#define mm_m64 {(OpndKind)(OpndKind_MMXReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null}
230+
231+#define xmm64 {OpndKind_XMMReg, OpndSize_64, OpndExt_Any, RegName_Null}
232+#define xmm_m64 {(OpndKind)(OpndKind_XMMReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null}
233+
234+#define xmm32 {OpndKind_XMMReg, OpndSize_32, OpndExt_Any, RegName_Null}
235+#define xmm_m32 {(OpndKind)(OpndKind_XMMReg|OpndKind_Mem), OpndSize_32, OpndExt_Any, RegName_Null}
236+
237+#define FP0S {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_FP0S}
238+#define FP0D {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_FP0D}
239+#define FP1S {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_FP1S}
240+#define FP1D {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_FP1D}
241+#define fp32 {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_Null}
242+#define fp64 {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_Null}
243+
244+#ifdef _EM64T_
245+ #define io OpcodeByteKind_io
246+ #define REX_W OpcodeByteKind_REX_W
247+
248+#endif
249+
250+#endif // USE_ENCODER_DEFINES
251+
252+/**
253+ * @brief Represents the REX part of instruction.
254+ */
255+struct Rex {
256+ unsigned char b : 1;
257+ unsigned char x : 1;
258+ unsigned char r : 1;
259+ unsigned char w : 1;
260+ unsigned char dummy : 4; // must be '0100'b
261+ unsigned int :24;
262+};
263+
264+/**
265+ * @brief Describes SIB (scale,index,base) byte.
266+ */
267+struct SIB {
268+ unsigned char base:3;
269+ unsigned char index:3;
270+ unsigned char scale:2;
271+ unsigned int padding:24;
272+};
273+/**
274+ * @brief Describes ModRM byte.
275+ */
276+struct ModRM
277+{
278+ unsigned char rm:3;
279+ unsigned char reg:3;
280+ unsigned char mod:2;
281+ unsigned int padding:24;
282+};
283+
284+
285+
286+/**
287+* exactly the same as EncoderBase::OpcodeDesc, but also holds info about
288+* platform on which the opcode is applicable.
289+*/
290+struct OpcodeInfo {
291+ enum platform {
292+ /// an opcode is valid on all platforms
293+ all,
294+ // opcode is valid on IA-32 only
295+ em64t,
296+ // opcode is valid on Intel64 only
297+ ia32,
298+ // opcode is added for the sake of disassembling, should not be used in encoding
299+ decoder,
300+ // only appears in master table, replaced with 'decoder' in hashed version
301+ decoder32,
302+ // only appears in master table, replaced with 'decoder' in hashed version
303+ decoder64,
304+ };
305+ platform platf;
306+ unsigned opcode[4+1+1];
307+ EncoderBase::OpndDesc opnds[EncoderBase::MAX_NUM_OPCODE_OPERANDS];
308+ EncoderBase::OpndRolesDesc roles;
309+};
310+
311+/**
312+ * @defgroup MF_ Mnemonic flags
313+*/
314+
315+ /**
316+ * Operation has no special properties.
317+ */
318+#define MF_NONE (0x00000000)
319+ /**
320+ * Operation affects flags
321+ */
322+#define MF_AFFECTS_FLAGS (0x00000001)
323+ /**
324+ * Operation uses flags - conditional operations, ADC/SBB/ETC
325+ */
326+#define MF_USES_FLAGS (0x00000002)
327+ /**
328+ * Operation is conditional - MOVcc/SETcc/Jcc/ETC
329+ */
330+#define MF_CONDITIONAL (0x00000004)
331+/**
332+ * Operation is symmetric - its args can be swapped (ADD/MUL/etc).
333+ */
334+#define MF_SYMMETRIC (0x00000008)
335+/**
336+ * Operation is XOR-like - XOR, SUB - operations of 'arg,arg' is pure def,
337+ * without use.
338+ */
339+#define MF_SAME_ARG_NO_USE (0x00000010)
340+
341+///@} // ~MNF
342+
343+/**
344+ * @see same structure as EncoderBase::MnemonicDesc, but carries
345+ * MnemonicInfo::OpcodeInfo[] instead of OpcodeDesc[].
346+ * Only used during prebuilding the encoding tables, thus it's hidden under
347+ * the appropriate define.
348+ */
349+struct MnemonicInfo {
350+ /**
351+ * The mnemonic itself
352+ */
353+ Mnemonic mn;
354+ /**
355+ * Various characteristics of mnemonic.
356+ * @see MF_
357+ */
358+ unsigned flags;
359+ /**
360+ * Number of args/des/uses/roles for the operation. For the operations
361+ * which may use different number of operands (i.e. IMUL/SHL) use the
362+ * most common value, or leave '0' if you are sure this info is not
363+ * required.
364+ */
365+ EncoderBase::OpndRolesDesc roles;
366+ /**
367+ * Print name of the mnemonic
368+ */
369+ const char * name;
370+ /**
371+ * Array of opcodes.
372+ * The terminating opcode description always have OpcodeByteKind_LAST
373+ * at the opcodes[i].opcode[0].
374+ * The size of '25' has nothing behind it, just counted the max
375+ * number of opcodes currently used (MOV instruction).
376+ */
377+ OpcodeInfo opcodes[25];
378+};
379+
380+ENCODER_NAMESPACE_END
381+
382+#endif // ~__ENC_PRVT_H_INCLUDED__
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/enc_tabl.cpp
@@ -0,0 +1,2164 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+
21+
22+#include <assert.h>
23+#include <stdio.h>
24+#include <stdlib.h> //qsort
25+#include <string.h>
26+#include <memory.h>
27+#include <errno.h>
28+#include <stdlib.h>
29+
30+
31+// need to use EM64T-specifics - new registers, defines from enc_prvt, etc...
32+#if !defined(_EM64T_)
33+ #define UNDEF_EM64T
34+ #define _EM64T_
35+#endif
36+
37+#define USE_ENCODER_DEFINES
38+#include "enc_prvt.h"
39+#include "enc_defs.h"
40+
41+#ifdef UNDEF_EM64T
42+ #undef _EM64T_
43+#endif
44+
45+//Android x86
46+#if 0 //!defined(_HAVE_MMX_)
47+ #define Mnemonic_PADDQ Mnemonic_Null
48+ #define Mnemonic_PAND Mnemonic_Null
49+ #define Mnemonic_POR Mnemonic_Null
50+ #define Mnemonic_PSUBQ Mnemonic_Null
51+#endif
52+
53+ENCODER_NAMESPACE_START
54+
55+
56+EncoderBase::MnemonicDesc EncoderBase::mnemonics[Mnemonic_Count];
57+EncoderBase::OpcodeDesc EncoderBase::opcodes[Mnemonic_Count][MAX_OPCODES];
58+unsigned char EncoderBase::opcodesHashMap[Mnemonic_Count][HASH_MAX];
59+
60+
61+/**
62+ * @file
63+ * @brief 'Master' copy of encoding data.
64+ */
65+
66+/*
67+This file contains a 'master copy' of encoding table - this is the info used
68+by both generator of native instructions (EncoderBase class) and by
69+disassembling routines. The first one uses an info how to encode the
70+instruction, and the second does an opposite - several separate tables are
71+built at runtime from this main table.
72+
73+=============================================================================
74+
75+The table was designed for easy support and maintenance. Thus, it was made as
76+much close as possible to the Intel's IA32 Architecture Manual descriptions.
77+The info is based on the latest (at the moment of writing) revision which is
78+June 2005, order number 253666-016.
79+
80+Normally, almost all of opcodes in the 'master' table represented exactly as
81+they are shown in the Intel's Architecture manual (well, with slashes
82+replaced with underscore). There are several exclusions especially marked.
83+
84+Normally, to add an opcode/instruction, one only need to copy the whole
85+string from the manual, and simply replace '/' with '_'.
86+
87+I.e., TheManual reads for DEC:
88+ (1) FE /1 DEC r/m8 Valid Valid Decrement r/m8 by 1.
89+ (2) REX + FE /1 DEC r/m8* Valid N.E. Decrement r/m8 by 1.
90+ (3) REX.W + FF /1 DEC r/m64 Valid N.E. Decrement r/m64 by 1.
91+
92+1. Note, that there is no need to explicitly specify REX-based opcodes for
93+ instruction to handle additional registers on EM64T:
94+
95+ (1) FE /1 DEC r/m8 Valid Valid Decrement r/m8 by 1.
96+ (3) REX.W + FF /1 DEC r/m64 Valid N.E. Decrement r/m64 by 1.
97+
98+2. Copy the string, strip off the text comments, replace '/'=>'_'. Note, that
99+ the second line is for EM64T only
100+
101+ (1) FE /1 DEC r/m8
102+ (3) REX.W + FF /1 DEC r/m64
103+
104+3. Fill out the mnemonic, opcode parameters parts
105+
106+ BEGIN_MNEMONIC(DEC, MF_AFFECTS_FLAGS, DU)
107+ BEGIN_OPCODES()
108+ {OpcodeInfo::all, {0xFE, _1}, {r_m8}, DU },
109+ {OpcodeInfo::em64t, {REX_W, 0xFF, _1}, {r_m64}, DU },
110+
111+ DU here - one argument, it's used and defined
112+
113+4. That's it, that simple !
114+
115+The operand roles (DU here) are used by Jitrino's optimizing engine to
116+perform data flow analysis. It also used to store/obtain number of operands.
117+
118+Special cases are (see the table for details):
119+LEA
120+Some FPU operations (i.e. FSTP)
121+packed things (XORPD, XORPS, CVTDQ2PD, CVTTPD2DQ)
122+
123+Also, the Jitrino's needs require to specify all operands - including
124+implicit ones (see IMUL).
125+
126+The master table iself does not need to be ordered - it's get sorted before
127+processing. It's recommended (though it's not a law) to group similar
128+instructions together - i.e. FPU instructions, MMX, etc.
129+
130+=============================================================================
131+
132+The encoding engine builds several tables basing on the 'master' one (here
133+'mnemonic' is a kind of synonim for 'instruction'):
134+
135+- list of mnemonics which holds general info about instructions
136+ (EncoderBase::mnemonics)
137+- an array of opcodes descriptions (EncodeBase::opcodes)
138+- a mapping between a hash value and an opcode description record for a given
139+ mnemonic (EncoderBase::opcodesHashMap)
140+
141+The EncoderBase::mnemonics holds general info about instructions.
142+The EncoderBase::opcodesHashMap is used for fast opcode selection basing on
143+a hash value.
144+The EncodeBase::opcodes is used for the encoding itself.
145+
146+=============================================================================
147+The hash value is calculated and used as follows:
148+
149+JIT-ted code uses the following operand sizes: 8-, 16-, 32- and 64-bits and
150+size for an operand can be encoded in just 2 bits.
151+
152+The following operand locations are available: one of registers - GP, FP,
153+MMX, XMM (not taking segment registers), a memory and an immediate, which
154+gives us 6 variants and can be enumerated in 3 bits.
155+
156+As a grand total, the the whole operand's info needed for opcode selection
157+can be packed in 5 bits. Taking into account the IMUL mnemonic with its 3
158+operands (including implicit ones), we're getting 15 bits per instruction and
159+the complete table is about 32768 items per single instruction.
160+
161+Seems too many, but luckily, the 15 bit limit will never be reached: the
162+worst case is IMUL with its 3 operands:
163+(IMUL r64, r/m64, imm32)/(IMUL r32, r/m32, imm32).
164+So, assigning lowest value to GP register, the max value of hash can be
165+reduced.
166+
167+The hash values to use are:
168+sizes:
169+ 8 -> 11
170+ 16 -> 10
171+ 32 -> 01
172+ 64 -> 00
173+locations:
174+ gp reg -> 000
175+ memory -> 001
176+ fp reg -> 010
177+ mmx reg -> 011
178+ xmm reg -> 100
179+ immediate -> 101
180+and the grand total for the worst case would be
181+[ GP 32] [GP 32] [Imm 32]
182+[000-01] [000-01] [101 01] = 1077
183+
184+However, the implicit operands adds additional value, and the worstest case
185+is 'SHLD r_m32, r32, CL=r8'. This gives us the maximum number of:
186+
187+[mem 32] [GP 32] [GP 8b]
188+[001-01] [000-01] [000-11] = 5155.
189+
190+The max number is pretty big and the hash functions is quite rare, thus it
191+is not resonable to use a direct addressing i.e.
192+OpcodeDesc[mnemonic][hash_code] - there would be a huge waste of space.
193+
194+Instead, we use a kind of mapping: the opcodes info is stored in packed
195+(here: non rare) array. The max number of opcodes will not exceed 255 for
196+each instruction. And we have an index array in which we store a mapping
197+between a hash code value and opcode position for each given instruction.
198+
199+Sounds a bit sophisticated, but in real is simple, the opcode gets selected
200+in 2 simple steps:
201+
202+1. Select [hash,mnemonic] => 'n'.
203+
204+The array is pretty rare - many cells contain 0xFF which
205+means 'invalid hash - no opcode with given characteristics'
206+
207+char EnbcoderBase::opcodesHashMap[Mnemonic_Count][HASH_MAX] =
208+
209++----+----+----+----+----+----+
210+| 00 | 05 | FF | FF | 03 | 12 | ...
211+|---------+-------------------+
212+| 12 | FF | FF | n | 04 | 25 | ... <- Mnemonic
213+|-----------------------------+
214+| FF | 11 | FF | 10 | 13 | .. | ...
215++-----------------------------+
216+ ... ^
217+ |
218+ hash
219+
220+2. Select [n,mnemonic] => 'opcode_desc11'
221+
222+OpcodeDesc EncoderBase::opcodes[Mnemonic_Count][MAX_OPCODES] =
223+
224++---------------+---------------+---------------+---------------+
225+| opcode_desc00 | opcode_desc01 | opcode_desc02 | last_opcode | ...
226++---------------+---------------+---------------+---------------+
227+| opcode_desc10 | opcode_desc11 | last_opcode | xxx | <- Mnemonic
228++---------------+---------------+---------------+---------------+
229+| opcode_desc20 | opcode_desc21 | opcode_desc22 | opcode_desc23 | ...
230++---------------+---------------+---------------+---------------+
231+ ...
232+ ^
233+ |
234+ n
235+
236+Now, use 'opcode_desc11'.
237+
238+=============================================================================
239+The array of opcodes descriptions (EncodeBase::opcodes) is specially prepared
240+to maximize performance - the EncoderBase::encode() is quite hot on client
241+applications for the Jitrino/Jitrino.JET.
242+The preparation is that opcode descriptions from the 'master' encoding table
243+are preprocessed and a special set of OpcodeDesc prepared:
244+First, the 'raw' opcode bytes are extracted. Here, 'raw' means the bytes that
245+do not depened on any operands values, do not require any analysis and can be
246+simply copied into the output buffer during encoding. Also, number of these
247+'raw' bytes is counted. The fields are OpcodeDesc::opcode and
248+OpcodeDesc::opcode_len.
249+
250+Then the fisrt non-implicit operand found and its index is stored in
251+OpcodeDesc::first_opnd.
252+
253+The bytes that require processing and analysis ('/r', '+i', etc) are
254+extracted and stored in OpcodeDesc::aux0 and OpcodeDesc::aux1 fields.
255+
256+Here, a special trick is performed:
257+ Some opcodes have register/memory operand, but this is not reflected in
258+ opcode column - for example, (MOVQ xmm64, xmm_m64). In this case, a fake
259+ '_r' added to OpcodeDesc::aux field.
260+ Some other opcodes have immediate operands, but this is again not
261+ reflected in opcode column - for example, CALL cd or PUSH imm32.
262+ In this case, a fake '/cd' or fake '/id' added to appropriate
263+ OpcodeDesc::aux field.
264+
265+The OpcodeDesc::last is non-zero for the final OpcodeDesc record (which does
266+not have valid data itself).
267+*/
268+
269+// TODO: To extend flexibility, replace bool fields in MnemonicDesc &
270+// MnemonicInfo with a set of flags packed into integer field.
271+
272+unsigned short EncoderBase::getHash(const OpcodeInfo* odesc)
273+{
274+ /*
275+ NOTE: any changes in the hash computation must be stricty balanced with
276+ EncoderBase::Operand::hash_it and EncoderBase::Operands()
277+ */
278+ unsigned short hash = 0;
279+ // The hash computation, uses fast way - table selection instead of if-s.
280+ if (odesc->roles.count > 0) {
281+ OpndKind kind = odesc->opnds[0].kind;
282+ OpndSize size = odesc->opnds[0].size;
283+ assert(kind<COUNTOF(kind_hash));
284+ assert(size<COUNTOF(size_hash));
285+ hash = get_kind_hash(kind) | get_size_hash(size);
286+ }
287+
288+ if (odesc->roles.count > 1) {
289+ OpndKind kind = odesc->opnds[1].kind;
290+ OpndSize size = odesc->opnds[1].size;
291+ assert(kind<COUNTOF(kind_hash));
292+ assert(size<COUNTOF(size_hash));
293+ hash = (hash<<HASH_BITS_PER_OPERAND) |
294+ (get_kind_hash(kind) | get_size_hash(size));
295+ }
296+
297+ if (odesc->roles.count > 2) {
298+ OpndKind kind = odesc->opnds[2].kind;
299+ OpndSize size = odesc->opnds[2].size;
300+ assert(kind<COUNTOF(kind_hash));
301+ assert(size<COUNTOF(size_hash));
302+ hash = (hash<<HASH_BITS_PER_OPERAND) |
303+ (get_kind_hash(kind) | get_size_hash(size));
304+ }
305+ assert(hash <= HASH_MAX);
306+ return hash;
307+}
308+
309+
310+#define BEGIN_MNEMONIC(mn, flags, roles) \
311+ { Mnemonic_##mn, flags, roles, #mn,
312+#define END_MNEMONIC() },
313+#define BEGIN_OPCODES() {
314+#define END_OPCODES() { OpcodeInfo::all, {OpcodeByteKind_LAST}, {}, {0, 0, 0, 0}}}
315+
316+
317+static MnemonicInfo masterEncodingTable[] = {
318+//
319+// Null
320+//
321+BEGIN_MNEMONIC(Null, MF_NONE, N)
322+BEGIN_OPCODES()
323+END_OPCODES()
324+END_MNEMONIC()
325+
326+BEGIN_MNEMONIC(LAHF, MF_USES_FLAGS, D)
327+BEGIN_OPCODES()
328+// TheManual says it's not always supported in em64t mode, thus excluding it
329+ {OpcodeInfo::ia32, {0x9F}, {EAX}, D },
330+END_OPCODES()
331+END_MNEMONIC()
332+//
333+// ALU mnemonics - add, adc, or, xor, and, cmp, sub, sbb
334+// as they differ only in the opcode extention (/digit) number and
335+// in which number the opcode start from, the opcode definitions
336+// for those instructions are packed together
337+//
338+// The 'opcode_starts_from' and 'opcode_ext' in DEFINE_ALU_OPCODES()
339+// are enough to define OpcodeInfo::all opcodes and the 'first_opcode'
340+// parameter is only due to ADD instruction, which requires an zero opcode
341+// byte which, in turn, is coded especially in the current coding scheme.
342+//
343+
344+#define DEFINE_ALU_OPCODES( opc_ext, opcode_starts_from, first_opcode, def_use ) \
345+\
346+ {OpcodeInfo::decoder, {opcode_starts_from + 4, ib}, {AL, imm8}, DU_U },\
347+ {OpcodeInfo::decoder, {Size16, opcode_starts_from + 5, iw}, {AX, imm16}, DU_U },\
348+ {OpcodeInfo::decoder, {opcode_starts_from + 5, id}, {EAX, imm32}, DU_U },\
349+ {OpcodeInfo::decoder64, {REX_W, opcode_starts_from+5, id}, {RAX, imm32s},DU_U },\
350+\
351+ {OpcodeInfo::all, {0x80, opc_ext, ib}, {r_m8, imm8}, def_use },\
352+ {OpcodeInfo::all, {Size16, 0x81, opc_ext, iw}, {r_m16, imm16}, def_use },\
353+ {OpcodeInfo::all, {0x81, opc_ext, id}, {r_m32, imm32}, def_use },\
354+ {OpcodeInfo::em64t, {REX_W, 0x81, opc_ext, id}, {r_m64, imm32s}, def_use },\
355+\
356+ {OpcodeInfo::all, {Size16, 0x83, opc_ext, ib}, {r_m16, imm8s}, def_use },\
357+ {OpcodeInfo::all, {0x83, opc_ext, ib}, {r_m32, imm8s}, def_use },\
358+ {OpcodeInfo::em64t, {REX_W, 0x83, opc_ext, ib}, {r_m64, imm8s}, def_use },\
359+\
360+ {OpcodeInfo::all, {first_opcode, _r}, {r_m8, r8}, def_use },\
361+\
362+ {OpcodeInfo::all, {Size16, opcode_starts_from+1, _r}, {r_m16, r16}, def_use },\
363+ {OpcodeInfo::all, {opcode_starts_from+1, _r}, {r_m32, r32}, def_use },\
364+ {OpcodeInfo::em64t, {REX_W, opcode_starts_from+1, _r}, {r_m64, r64}, def_use },\
365+\
366+ {OpcodeInfo::all, {opcode_starts_from+2, _r}, {r8, r_m8}, def_use },\
367+\
368+ {OpcodeInfo::all, {Size16, opcode_starts_from+3, _r}, {r16, r_m16}, def_use },\
369+ {OpcodeInfo::all, {opcode_starts_from+3, _r}, {r32, r_m32}, def_use },\
370+ {OpcodeInfo::em64t, {REX_W, opcode_starts_from+3, _r}, {r64, r_m64}, def_use },
371+
372+BEGIN_MNEMONIC(ADD, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U)
373+BEGIN_OPCODES()
374+ DEFINE_ALU_OPCODES(_0, 0x00, OxOO, DU_U )
375+END_OPCODES()
376+END_MNEMONIC()
377+
378+BEGIN_MNEMONIC(OR, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U)
379+BEGIN_OPCODES()
380+ DEFINE_ALU_OPCODES(_1, 0x08, 0x08, DU_U )
381+END_OPCODES()
382+END_MNEMONIC()
383+
384+BEGIN_MNEMONIC(ADC, MF_AFFECTS_FLAGS|MF_USES_FLAGS|MF_SYMMETRIC, DU_U)
385+BEGIN_OPCODES()
386+ DEFINE_ALU_OPCODES(_2, 0x10, 0x10, DU_U )
387+END_OPCODES()
388+END_MNEMONIC()
389+
390+BEGIN_MNEMONIC(SBB, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U)
391+BEGIN_OPCODES()
392+ DEFINE_ALU_OPCODES(_3, 0x18, 0x18, DU_U )
393+END_OPCODES()
394+END_MNEMONIC()
395+
396+BEGIN_MNEMONIC(AND, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U)
397+BEGIN_OPCODES()
398+ DEFINE_ALU_OPCODES(_4, 0x20, 0x20, DU_U )
399+END_OPCODES()
400+END_MNEMONIC()
401+
402+
403+BEGIN_MNEMONIC(SUB, MF_AFFECTS_FLAGS|MF_SAME_ARG_NO_USE, DU_U)
404+BEGIN_OPCODES()
405+ DEFINE_ALU_OPCODES(_5, 0x28, 0x28, DU_U )
406+END_OPCODES()
407+END_MNEMONIC()
408+
409+
410+BEGIN_MNEMONIC(XOR, MF_AFFECTS_FLAGS|MF_SYMMETRIC|MF_SAME_ARG_NO_USE, DU_U)
411+BEGIN_OPCODES()
412+ DEFINE_ALU_OPCODES( _6, 0x30, 0x30, DU_U )
413+END_OPCODES()
414+END_MNEMONIC()
415+
416+BEGIN_MNEMONIC(CMP, MF_AFFECTS_FLAGS, U_U)
417+BEGIN_OPCODES()
418+ DEFINE_ALU_OPCODES( _7, 0x38, 0x38, U_U )
419+END_OPCODES()
420+END_MNEMONIC()
421+
422+BEGIN_MNEMONIC(CMPXCHG, MF_AFFECTS_FLAGS, N)
423+BEGIN_OPCODES()
424+ {OpcodeInfo::all, {0x0F, 0xB0, _r}, {r_m8, r8, AL}, DU_DU_DU },
425+ {OpcodeInfo::all, {Size16, 0x0F, 0xB1, _r}, {r_m16, r16, AX}, DU_DU_DU },
426+ {OpcodeInfo::all, {0x0F, 0xB1, _r}, {r_m32, r32, EAX}, DU_DU_DU},
427+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB1, _r}, {r_m64, r64, RAX}, DU_DU_DU },
428+END_OPCODES()
429+END_MNEMONIC()
430+
431+BEGIN_MNEMONIC(CMPXCHG8B, MF_AFFECTS_FLAGS, D)
432+BEGIN_OPCODES()
433+ {OpcodeInfo::all, {0x0F, 0xC7, _1}, {m64}, DU },
434+END_OPCODES()
435+END_MNEMONIC()
436+
437+#undef DEFINE_ALU_OPCODES
438+//
439+//
440+//
441+BEGIN_MNEMONIC(ADDSD, MF_NONE, DU_U)
442+BEGIN_OPCODES()
443+ {OpcodeInfo::all, {0xF2, 0x0F, 0x58, _r}, {xmm64, xmm_m64}, DU_U},
444+END_OPCODES()
445+END_MNEMONIC()
446+
447+BEGIN_MNEMONIC(ADDSS, MF_NONE, DU_U)
448+BEGIN_OPCODES()
449+ {OpcodeInfo::all, {0xF3, 0x0F, 0x58, _r}, {xmm32, xmm_m32}, DU_U},
450+END_OPCODES()
451+END_MNEMONIC()
452+
453+
454+BEGIN_MNEMONIC(BSF, MF_AFFECTS_FLAGS, N)
455+BEGIN_OPCODES()
456+ {OpcodeInfo::all, {0x0F, 0xBC}, {r32, r_m32}, D_U},
457+END_OPCODES()
458+END_MNEMONIC()
459+
460+BEGIN_MNEMONIC(BSR, MF_AFFECTS_FLAGS, N)
461+BEGIN_OPCODES()
462+ {OpcodeInfo::all, {0x0F, 0xBD}, {r32, r_m32}, D_U},
463+END_OPCODES()
464+END_MNEMONIC()
465+
466+
467+BEGIN_MNEMONIC(CALL, MF_NONE, U )
468+BEGIN_OPCODES()
469+ {OpcodeInfo::all, {0xE8, cd}, {rel32}, U },
470+ {OpcodeInfo::ia32, {Size16, 0xE8, cw}, {rel16}, U },
471+ {OpcodeInfo::ia32, {0xFF, _2}, {r_m32}, U },
472+ {OpcodeInfo::em64t, {0xFF, _2}, {r_m64}, U },
473+END_OPCODES()
474+END_MNEMONIC()
475+
476+BEGIN_MNEMONIC(CMC, MF_USES_FLAGS|MF_AFFECTS_FLAGS, N)
477+BEGIN_OPCODES()
478+ {OpcodeInfo::decoder, {0xF5}, {}, N },
479+END_OPCODES()
480+END_MNEMONIC()
481+
482+//TODO: Workaround. Actually, it's D_DU, but Jitrino's CG thinks it's D_U
483+BEGIN_MNEMONIC(CDQ, MF_NONE, D_U )
484+BEGIN_OPCODES()
485+ {OpcodeInfo::all, {0x99}, {DX, AX}, D_U },
486+ {OpcodeInfo::all, {0x99}, {EDX, EAX}, D_U },
487+ {OpcodeInfo::em64t, {REX_W, 0x99}, {RDX, RAX}, D_U },
488+END_OPCODES()
489+END_MNEMONIC()
490+
491+#define DEFINE_CMOVcc_MNEMONIC( cc ) \
492+ BEGIN_MNEMONIC(CMOV##cc, MF_USES_FLAGS|MF_CONDITIONAL, DU_U ) \
493+BEGIN_OPCODES() \
494+ {OpcodeInfo::all, {Size16, 0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r16, r_m16}, DU_U }, \
495+ {OpcodeInfo::all, {0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r32, r_m32}, DU_U }, \
496+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r64, r_m64}, DU_U }, \
497+END_OPCODES() \
498+END_MNEMONIC()
499+
500+DEFINE_CMOVcc_MNEMONIC(O)
501+DEFINE_CMOVcc_MNEMONIC(NO)
502+DEFINE_CMOVcc_MNEMONIC(B)
503+DEFINE_CMOVcc_MNEMONIC(NB)
504+DEFINE_CMOVcc_MNEMONIC(Z)
505+DEFINE_CMOVcc_MNEMONIC(NZ)
506+DEFINE_CMOVcc_MNEMONIC(BE)
507+DEFINE_CMOVcc_MNEMONIC(NBE)
508+DEFINE_CMOVcc_MNEMONIC(S)
509+DEFINE_CMOVcc_MNEMONIC(NS)
510+DEFINE_CMOVcc_MNEMONIC(P)
511+DEFINE_CMOVcc_MNEMONIC(NP)
512+DEFINE_CMOVcc_MNEMONIC(L)
513+DEFINE_CMOVcc_MNEMONIC(NL)
514+DEFINE_CMOVcc_MNEMONIC(LE)
515+DEFINE_CMOVcc_MNEMONIC(NLE)
516+
517+#undef DEFINE_CMOVcc_MNEMONIC
518+
519+/*****************************************************************************
520+ ***** SSE conversion routines *****
521+*****************************************************************************/
522+//
523+// double -> float
524+BEGIN_MNEMONIC(CVTSD2SS, MF_NONE, D_U )
525+BEGIN_OPCODES()
526+ {OpcodeInfo::all, {0xF2, 0x0F, 0x5A, _r}, {xmm32, xmm_m64}, D_U },
527+END_OPCODES()
528+END_MNEMONIC()
529+
530+// double -> I_32
531+BEGIN_MNEMONIC(CVTSD2SI, MF_NONE, D_U )
532+BEGIN_OPCODES()
533+ {OpcodeInfo::all, {0xF2, 0x0F, 0x2D, _r}, {r32, xmm_m64}, D_U },
534+ {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2D, _r}, {r64, xmm_m64}, D_U },
535+END_OPCODES()
536+END_MNEMONIC()
537+
538+// double [truncated] -> I_32
539+BEGIN_MNEMONIC(CVTTSD2SI, MF_NONE, D_U )
540+BEGIN_OPCODES()
541+ {OpcodeInfo::all, {0xF2, 0x0F, 0x2C, _r}, {r32, xmm_m64}, D_U },
542+ {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2C, _r}, {r64, xmm_m64}, D_U },
543+END_OPCODES()
544+END_MNEMONIC()
545+
546+// float -> double
547+BEGIN_MNEMONIC(CVTSS2SD, MF_NONE, D_U )
548+BEGIN_OPCODES()
549+ {OpcodeInfo::all, {0xF3, 0x0F, 0x5A, _r}, {xmm64, xmm_m32}, D_U },
550+END_OPCODES()
551+END_MNEMONIC()
552+
553+// float -> I_32
554+BEGIN_MNEMONIC(CVTSS2SI, MF_NONE, D_U )
555+BEGIN_OPCODES()
556+ {OpcodeInfo::all, {0xF3, 0x0F, 0x2D, _r}, {r32, xmm_m32}, D_U},
557+ {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2D, _r}, {r64, xmm_m32}, D_U},
558+END_OPCODES()
559+END_MNEMONIC()
560+
561+// float [truncated] -> I_32
562+BEGIN_MNEMONIC(CVTTSS2SI, MF_NONE, D_U )
563+BEGIN_OPCODES()
564+ {OpcodeInfo::all, {0xF3, 0x0F, 0x2C, _r}, {r32, xmm_m32}, D_U},
565+ {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2C, _r}, {r64, xmm_m32}, D_U},
566+END_OPCODES()
567+END_MNEMONIC()
568+
569+// I_32 -> double
570+BEGIN_MNEMONIC(CVTSI2SD, MF_NONE, D_U )
571+BEGIN_OPCODES()
572+ {OpcodeInfo::all, {0xF2, 0x0F, 0x2A, _r}, {xmm64, r_m32}, D_U},
573+ {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2A, _r}, {xmm64, r_m64}, D_U},
574+END_OPCODES()
575+END_MNEMONIC()
576+
577+// I_32 -> float
578+BEGIN_MNEMONIC(CVTSI2SS, MF_NONE, D_U )
579+BEGIN_OPCODES()
580+ {OpcodeInfo::all, {0xF3, 0x0F, 0x2A, _r}, {xmm32, r_m32}, D_U},
581+ {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2A, _r}, {xmm32, r_m64}, D_U},
582+END_OPCODES()
583+END_MNEMONIC()
584+
585+//
586+// ~ SSE conversions
587+//
588+
589+BEGIN_MNEMONIC(DEC, MF_AFFECTS_FLAGS, DU )
590+BEGIN_OPCODES()
591+ {OpcodeInfo::all, {0xFE, _1}, {r_m8}, DU },
592+
593+ {OpcodeInfo::all, {Size16, 0xFF, _1}, {r_m16}, DU },
594+ {OpcodeInfo::all, {0xFF, _1}, {r_m32}, DU },
595+ {OpcodeInfo::em64t, {REX_W, 0xFF, _1}, {r_m64}, DU },
596+
597+ {OpcodeInfo::ia32, {Size16, 0x48|rw}, {r16}, DU },
598+ {OpcodeInfo::ia32, {0x48|rd}, {r32}, DU },
599+END_OPCODES()
600+END_MNEMONIC()
601+
602+
603+BEGIN_MNEMONIC(DIVSD, MF_NONE, DU_U)
604+BEGIN_OPCODES()
605+ {OpcodeInfo::all, {0xF2, 0x0F, 0x5E, _r}, {xmm64, xmm_m64}, DU_U },
606+END_OPCODES()
607+END_MNEMONIC()
608+
609+
610+BEGIN_MNEMONIC(DIVSS, MF_NONE, DU_U)
611+BEGIN_OPCODES()
612+ {OpcodeInfo::all, {0xF3, 0x0F, 0x5E, _r}, {xmm32, xmm_m32}, DU_U },
613+END_OPCODES()
614+END_MNEMONIC()
615+
616+/****************************************************************************
617+ ***** FPU operations *****
618+****************************************************************************/
619+
620+BEGIN_MNEMONIC(FADDP, MF_NONE, DU )
621+BEGIN_OPCODES()
622+ {OpcodeInfo::all, {0xDE, 0xC1}, {FP0D}, DU },
623+ {OpcodeInfo::all, {0xDE, 0xC1}, {FP0S}, DU },
624+END_OPCODES()
625+END_MNEMONIC()
626+
627+BEGIN_MNEMONIC(FLDZ, MF_NONE, U )
628+BEGIN_OPCODES()
629+ {OpcodeInfo::all, {0xD9, 0xEE}, {FP0D}, D },
630+ {OpcodeInfo::all, {0xD9, 0xEE}, {FP0S}, D },
631+END_OPCODES()
632+END_MNEMONIC()
633+
634+BEGIN_MNEMONIC(FADD, MF_NONE, U )
635+BEGIN_OPCODES()
636+ {OpcodeInfo::all, {0xDC, _0}, {FP0D, m64}, DU_U },
637+ {OpcodeInfo::all, {0xD8, _0}, {FP0S, m32}, DU_U },
638+END_OPCODES()
639+END_MNEMONIC()
640+
641+BEGIN_MNEMONIC(FSUBP, MF_NONE, DU )
642+BEGIN_OPCODES()
643+ {OpcodeInfo::all, {0xDE, 0xE9}, {FP0D}, DU },
644+ {OpcodeInfo::all, {0xDE, 0xE9}, {FP0S}, DU },
645+END_OPCODES()
646+END_MNEMONIC()
647+
648+BEGIN_MNEMONIC(FSUB, MF_NONE, U )
649+BEGIN_OPCODES()
650+ {OpcodeInfo::all, {0xDC, _4}, {FP0D, m64}, DU_U },
651+ {OpcodeInfo::all, {0xD8, _4}, {FP0S, m32}, DU_U },
652+END_OPCODES()
653+END_MNEMONIC()
654+
655+BEGIN_MNEMONIC(FISUB, MF_NONE, U )
656+BEGIN_OPCODES()
657+ {OpcodeInfo::all, {0xDA, _4}, {FP0S, m32}, DU_U },
658+// {OpcodeInfo::all, {0xDE, _4}, {FP0S, m16}, DU_U },
659+END_OPCODES()
660+END_MNEMONIC()
661+
662+
663+
664+BEGIN_MNEMONIC(FMUL, MF_NONE, DU_U )
665+BEGIN_OPCODES()
666+ {OpcodeInfo::all, {0xD8, _1}, {FP0S, m32}, DU_U },
667+ {OpcodeInfo::all, {0xDC, _1}, {FP0D, m64}, DU_U },
668+END_OPCODES()
669+END_MNEMONIC()
670+
671+BEGIN_MNEMONIC(FMULP, MF_NONE, DU )
672+BEGIN_OPCODES()
673+ {OpcodeInfo::all, {0xDE, 0xC9}, {FP0D}, DU },
674+ {OpcodeInfo::all, {0xDE, 0xC9}, {FP0S}, DU },
675+END_OPCODES()
676+END_MNEMONIC()
677+
678+BEGIN_MNEMONIC(FDIVP, MF_NONE, DU )
679+BEGIN_OPCODES()
680+ {OpcodeInfo::all, {0xDE, 0xF9}, {FP0D}, DU },
681+ {OpcodeInfo::all, {0xDE, 0xF9}, {FP0S}, DU },
682+END_OPCODES()
683+END_MNEMONIC()
684+
685+BEGIN_MNEMONIC(FDIV, MF_NONE, U )
686+BEGIN_OPCODES()
687+ {OpcodeInfo::all, {0xDC, _6}, {FP0D, m64}, DU_U },
688+ {OpcodeInfo::all, {0xD8, _6}, {FP0S, m32}, DU_U },
689+END_OPCODES()
690+END_MNEMONIC()
691+
692+
693+BEGIN_MNEMONIC(FUCOM, MF_NONE, D_U )
694+BEGIN_OPCODES()
695+ {OpcodeInfo::all, {0xDD, 0xE1}, {FP0D, FP1D}, DU_U },
696+ {OpcodeInfo::all, {0xDD, 0xE1}, {FP0S, FP1S}, DU_U },
697+ // A little trick: actually, these 2 opcodes take only index of the
698+ // needed register. To make the things similar to other instructions
699+ // we encode here as if they took FPREG.
700+ {OpcodeInfo::all, {0xDD, 0xE0|_i}, {fp32}, DU },
701+ {OpcodeInfo::all, {0xDD, 0xE0|_i}, {fp64}, DU },
702+END_OPCODES()
703+END_MNEMONIC()
704+
705+BEGIN_MNEMONIC(FUCOMI, MF_NONE, D_U )
706+BEGIN_OPCODES()
707+ // A little trick: actually, these 2 opcodes take only index of the
708+ // needed register. To make the things similar to other instructions
709+ // we encode here as if they took FPREG.
710+ {OpcodeInfo::all, {0xDB, 0xE8|_i}, {fp32}, DU },
711+ {OpcodeInfo::all, {0xDB, 0xE8|_i}, {fp64}, DU },
712+END_OPCODES()
713+END_MNEMONIC()
714+
715+BEGIN_MNEMONIC(FUCOMP, MF_NONE, D_U )
716+BEGIN_OPCODES()
717+ {OpcodeInfo::all, {0xDD, 0xE9}, {FP0D, FP1D}, DU_U },
718+ {OpcodeInfo::all, {0xDD, 0xE9}, {FP0S, FP1S}, DU_U },
719+ // A little trick: actually, these 2 opcodes take only index of the
720+ // needed register. To make the things similar to other instructions
721+ // we encode here as if they took FPREG.
722+ {OpcodeInfo::all, {0xDD, 0xE8|_i}, {fp32}, DU },
723+ {OpcodeInfo::all, {0xDD, 0xE8|_i}, {fp64}, DU },
724+END_OPCODES()
725+END_MNEMONIC()
726+
727+BEGIN_MNEMONIC(FUCOMIP, MF_NONE, D_U )
728+BEGIN_OPCODES()
729+ // A little trick: actually, these 2 opcodes take only index of the
730+ // needed register. To make the things similar to other instructions
731+ // we encode here as if they took FPREG.
732+ {OpcodeInfo::all, {0xDF, 0xE8|_i}, {fp32}, DU },
733+ {OpcodeInfo::all, {0xDF, 0xE8|_i}, {fp64}, DU },
734+END_OPCODES()
735+END_MNEMONIC()
736+
737+BEGIN_MNEMONIC(FUCOMPP, MF_NONE, U )
738+BEGIN_OPCODES()
739+ {OpcodeInfo::all, {0xDA, 0xE9}, {FP0D, FP1D}, DU_U },
740+ {OpcodeInfo::all, {0xDA, 0xE9}, {FP0S, FP1S}, DU_U },
741+END_OPCODES()
742+END_MNEMONIC()
743+
744+BEGIN_MNEMONIC(FLDCW, MF_NONE, U )
745+BEGIN_OPCODES()
746+ {OpcodeInfo::all, {0xD9, _5}, {m16}, U },
747+END_OPCODES()
748+END_MNEMONIC()
749+
750+BEGIN_MNEMONIC(FNSTCW, MF_NONE, D)
751+BEGIN_OPCODES()
752+ {OpcodeInfo::all, {0xD9, _7}, {m16}, D },
753+END_OPCODES()
754+END_MNEMONIC()
755+
756+BEGIN_MNEMONIC(FSTSW, MF_NONE, D)
757+BEGIN_OPCODES()
758+ {OpcodeInfo::all, {0x9B, 0xDF, 0xE0}, {EAX}, D },
759+END_OPCODES()
760+END_MNEMONIC()
761+
762+BEGIN_MNEMONIC(FNSTSW, MF_NONE, D)
763+BEGIN_OPCODES()
764+ {OpcodeInfo::all, {0xDF, 0xE0}, {EAX}, D },
765+END_OPCODES()
766+END_MNEMONIC()
767+
768+BEGIN_MNEMONIC(FCHS, MF_NONE, DU )
769+BEGIN_OPCODES()
770+ {OpcodeInfo::all, {0xD9, 0xE0}, {FP0D}, DU },
771+ {OpcodeInfo::all, {0xD9, 0xE0}, {FP0S}, DU },
772+END_OPCODES()
773+END_MNEMONIC()
774+
775+BEGIN_MNEMONIC(FCLEX, MF_NONE, N)
776+BEGIN_OPCODES()
777+ {OpcodeInfo::all, {0x9B, 0xDB, 0xE2}, {}, N },
778+END_OPCODES()
779+END_MNEMONIC()
780+
781+BEGIN_MNEMONIC(FNCLEX, MF_NONE, N)
782+BEGIN_OPCODES()
783+ {OpcodeInfo::all, {0xDB, 0xE2}, {}, N },
784+END_OPCODES()
785+END_MNEMONIC()
786+
787+//BEGIN_MNEMONIC(FDECSTP, MF_NONE, N)
788+// BEGIN_OPCODES()
789+// {OpcodeInfo::all, {0xD9, 0xF6}, {}, N },
790+// END_OPCODES()
791+//END_MNEMONIC()
792+
793+BEGIN_MNEMONIC(FILD, MF_NONE, D_U )
794+BEGIN_OPCODES()
795+ {OpcodeInfo::all, {0xDB, _0}, {FP0S, m32}, D_U },
796+ {OpcodeInfo::all, {0xDF, _5}, {FP0D, m64}, D_U },
797+ {OpcodeInfo::all, {0xDB, _0}, {FP0S, m32}, D_U },
798+END_OPCODES()
799+END_MNEMONIC()
800+
801+//BEGIN_MNEMONIC(FINCSTP, MF_NONE, N)
802+// BEGIN_OPCODES()
803+// {OpcodeInfo::all, {0xD9, 0xF7}, {}, N },
804+// END_OPCODES()
805+//END_MNEMONIC()
806+
807+BEGIN_MNEMONIC(FIST, MF_NONE, D_U )
808+BEGIN_OPCODES()
809+ {OpcodeInfo::all, {0xDB, _2}, {m32, FP0S}, D_U },
810+END_OPCODES()
811+END_MNEMONIC()
812+
813+BEGIN_MNEMONIC(FISTP, MF_NONE, D_U )
814+BEGIN_OPCODES()
815+ {OpcodeInfo::all, {0xDB, _3}, {m32, FP0S}, D_U },
816+ {OpcodeInfo::all, {0xDF, _7}, {m64, FP0D}, D_U },
817+END_OPCODES()
818+END_MNEMONIC()
819+
820+BEGIN_MNEMONIC(FISTTP, MF_NONE, D_U )
821+BEGIN_OPCODES()
822+ {OpcodeInfo::all, {0xDD, _1}, {m64, FP0D}, D_U },
823+ {OpcodeInfo::all, {0xDB, _1}, {m32, FP0S}, D_U },
824+END_OPCODES()
825+END_MNEMONIC()
826+
827+BEGIN_MNEMONIC(FRNDINT, MF_NONE, DU )
828+BEGIN_OPCODES()
829+ {OpcodeInfo::all, {0xD9, 0xFC}, {FP0S}, DU },
830+ {OpcodeInfo::all, {0xD9, 0xFC}, {FP0D}, DU },
831+END_OPCODES()
832+END_MNEMONIC()
833+
834+BEGIN_MNEMONIC(FLD, MF_NONE, D_U )
835+BEGIN_OPCODES()
836+ {OpcodeInfo::all, {0xD9, _0}, {FP0S, m32}, D_U },
837+ {OpcodeInfo::all, {0xDD, _0}, {FP0D, m64}, D_U },
838+END_OPCODES()
839+END_MNEMONIC()
840+
841+BEGIN_MNEMONIC(FLDLG2, MF_NONE, U )
842+BEGIN_OPCODES()
843+ {OpcodeInfo::all, {0xD9, 0xEC}, {FP0S}, D },
844+ {OpcodeInfo::all, {0xD9, 0xEC}, {FP0D}, D },
845+END_OPCODES()
846+END_MNEMONIC()
847+
848+BEGIN_MNEMONIC(FLDLN2, MF_NONE, U )
849+BEGIN_OPCODES()
850+ {OpcodeInfo::all, {0xD9, 0xED}, {FP0S}, D },
851+ {OpcodeInfo::all, {0xD9, 0xED}, {FP0D}, D },
852+END_OPCODES()
853+END_MNEMONIC()
854+
855+BEGIN_MNEMONIC(FLD1, MF_NONE, U )
856+BEGIN_OPCODES()
857+ {OpcodeInfo::all, {0xD9, 0xE8}, {FP0S}, D },
858+ {OpcodeInfo::all, {0xD9, 0xE8}, {FP0D}, D },
859+END_OPCODES()
860+END_MNEMONIC()
861+
862+
863+BEGIN_MNEMONIC(FPREM, MF_NONE, N)
864+ BEGIN_OPCODES()
865+ {OpcodeInfo::all, {0xD9, 0xF8}, {}, N },
866+ END_OPCODES()
867+END_MNEMONIC()
868+
869+BEGIN_MNEMONIC(FPREM1, MF_NONE, N)
870+BEGIN_OPCODES()
871+ {OpcodeInfo::all, {0xD9, 0xF5}, {}, N },
872+END_OPCODES()
873+END_MNEMONIC()
874+
875+BEGIN_MNEMONIC(FST, MF_NONE, D_U )
876+BEGIN_OPCODES()
877+ {OpcodeInfo::all, {0xD9, _2}, {m32, FP0S}, D_U },
878+ {OpcodeInfo::all, {0xDD, _2}, {m64, FP0D}, D_U },
879+ // A little trick: actually, these 2 opcodes take only index of the
880+ // needed register. To make the things similar to other instructions
881+ // we encode here as if they took FPREG.
882+ {OpcodeInfo::all, {0xDD, 0xD0|_i}, {fp32}, D },
883+ {OpcodeInfo::all, {0xDD, 0xD0|_i}, {fp64}, D },
884+END_OPCODES()
885+END_MNEMONIC()
886+
887+BEGIN_MNEMONIC(FSTP, MF_NONE, D_U )
888+BEGIN_OPCODES()
889+ {OpcodeInfo::all, {0xD9, _3}, {m32, FP0S}, D_U },
890+ {OpcodeInfo::all, {0xDD, _3}, {m64, FP0D}, D_U },
891+ // A little trick: actually, these 2 opcodes take only index of the
892+ // needed register. To make the things similar to other instructions
893+ // we encode here as if they took FPREG.
894+ {OpcodeInfo::all, {0xDD, 0xD8|_i}, {fp32}, D },
895+ {OpcodeInfo::all, {0xDD, 0xD8|_i}, {fp64}, D },
896+END_OPCODES()
897+END_MNEMONIC()
898+
899+BEGIN_MNEMONIC(FSQRT, MF_NONE, DU)
900+ BEGIN_OPCODES()
901+ {OpcodeInfo::all, {0xD9, 0xFA}, {FP0S}, DU },
902+ {OpcodeInfo::all, {0xD9, 0xFA}, {FP0D}, DU },
903+ END_OPCODES()
904+END_MNEMONIC()
905+
906+
907+BEGIN_MNEMONIC(FYL2X, MF_NONE, DU)
908+ BEGIN_OPCODES()
909+ {OpcodeInfo::all, {0xD9, 0xF1}, {FP0S}, DU },
910+ {OpcodeInfo::all, {0xD9, 0xF1}, {FP0D}, DU },
911+ END_OPCODES()
912+END_MNEMONIC()
913+
914+
915+BEGIN_MNEMONIC(FYL2XP1, MF_NONE, DU)
916+ BEGIN_OPCODES()
917+ {OpcodeInfo::all, {0xD9, 0xF9}, {FP0S}, DU },
918+ {OpcodeInfo::all, {0xD9, 0xF9}, {FP0D}, DU },
919+ END_OPCODES()
920+END_MNEMONIC()
921+
922+BEGIN_MNEMONIC(F2XM1, MF_NONE, DU)
923+ BEGIN_OPCODES()
924+ {OpcodeInfo::all, {0xD9, 0xF0}, {FP0S}, DU },
925+ {OpcodeInfo::all, {0xD9, 0xF0}, {FP0D}, DU },
926+ END_OPCODES()
927+END_MNEMONIC()
928+
929+BEGIN_MNEMONIC(FPATAN, MF_NONE, DU)
930+ BEGIN_OPCODES()
931+ {OpcodeInfo::all, {0xD9, 0xF3}, {FP0S}, DU },
932+ {OpcodeInfo::all, {0xD9, 0xF3}, {FP0D}, DU },
933+ END_OPCODES()
934+END_MNEMONIC()
935+
936+BEGIN_MNEMONIC(FXCH, MF_NONE, DU)
937+ BEGIN_OPCODES()
938+ {OpcodeInfo::all, {0xD9, 0xC9}, {FP0S}, DU },
939+ {OpcodeInfo::all, {0xD9, 0xC9}, {FP0D}, DU },
940+ END_OPCODES()
941+END_MNEMONIC()
942+
943+BEGIN_MNEMONIC(FSCALE, MF_NONE, DU)
944+ BEGIN_OPCODES()
945+ {OpcodeInfo::all, {0xD9, 0xFD}, {FP0S}, DU },
946+ {OpcodeInfo::all, {0xD9, 0xFD}, {FP0D}, DU },
947+ END_OPCODES()
948+END_MNEMONIC()
949+
950+BEGIN_MNEMONIC(FABS, MF_NONE, DU)
951+ BEGIN_OPCODES()
952+ {OpcodeInfo::all, {0xD9, 0xE1}, {FP0S}, DU },
953+ {OpcodeInfo::all, {0xD9, 0xE1}, {FP0D}, DU },
954+ END_OPCODES()
955+END_MNEMONIC()
956+
957+BEGIN_MNEMONIC(FSIN, MF_NONE, DU)
958+ BEGIN_OPCODES()
959+ {OpcodeInfo::all, {0xD9, 0xFE}, {FP0S}, DU },
960+ {OpcodeInfo::all, {0xD9, 0xFE}, {FP0D}, DU },
961+ END_OPCODES()
962+END_MNEMONIC()
963+
964+BEGIN_MNEMONIC(FCOS, MF_NONE, DU)
965+ BEGIN_OPCODES()
966+ {OpcodeInfo::all, {0xD9, 0xFF}, {FP0S}, DU },
967+ {OpcodeInfo::all, {0xD9, 0xFF}, {FP0D}, DU },
968+ END_OPCODES()
969+END_MNEMONIC()
970+
971+BEGIN_MNEMONIC(FPTAN, MF_NONE, DU)
972+ BEGIN_OPCODES()
973+ {OpcodeInfo::all, {0xD9, 0xF2}, {FP0S}, DU },
974+ {OpcodeInfo::all, {0xD9, 0xF2}, {FP0D}, DU },
975+ END_OPCODES()
976+END_MNEMONIC()
977+
978+//
979+// ~ FPU
980+//
981+
982+BEGIN_MNEMONIC(DIV, MF_AFFECTS_FLAGS, DU_DU_U)
983+BEGIN_OPCODES()
984+#if !defined(_EM64T_)
985+ {OpcodeInfo::all, {0xF6, _6}, {AH, AL, r_m8}, DU_DU_U },
986+ {OpcodeInfo::all, {Size16, 0xF7, _6}, {DX, AX, r_m16}, DU_DU_U },
987+#endif
988+ {OpcodeInfo::all, {0xF7, _6}, {EDX, EAX, r_m32}, DU_DU_U },
989+ {OpcodeInfo::em64t, {REX_W, 0xF7, _6}, {RDX, RAX, r_m64}, DU_DU_U },
990+END_OPCODES()
991+END_MNEMONIC()
992+
993+BEGIN_MNEMONIC(IDIV, MF_AFFECTS_FLAGS, DU_DU_U)
994+BEGIN_OPCODES()
995+#if !defined(_EM64T_)
996+ {OpcodeInfo::all, {0xF6, _7}, {AH, AL, r_m8}, DU_DU_U },
997+ {OpcodeInfo::all, {Size16, 0xF7, _7}, {DX, AX, r_m16}, DU_DU_U },
998+#endif
999+ {OpcodeInfo::all, {0xF7, _7}, {EDX, EAX, r_m32}, DU_DU_U },
1000+ {OpcodeInfo::em64t, {REX_W, 0xF7, _7}, {RDX, RAX, r_m64}, DU_DU_U },
1001+END_OPCODES()
1002+END_MNEMONIC()
1003+
1004+
1005+BEGIN_MNEMONIC(IMUL, MF_AFFECTS_FLAGS, D_DU_U)
1006+BEGIN_OPCODES()
1007+ /*{OpcodeInfo::all, {0xF6, _5}, {AH, AL, r_m8}, D_DU_U },
1008+ {OpcodeInfo::all, {Size16, 0xF7, _5}, {DX, AX, r_m16}, D_DU_U },
1009+ */
1010+ //
1011+ {OpcodeInfo::all, {0xF7, _5}, {EDX, EAX, r_m32}, D_DU_U },
1012+ //todo: this opcode's hash conflicts with IMUL r64,r_m64 - they're both 0.
1013+ // this particular is not currently used, so we may safely drop it, but need to
1014+ // revisit the hash implementation
1015+ // {OpcodeInfo::em64t, {REX_W, 0xF7, _5}, {RDX, RAX, r_m64}, D_DU_U },
1016+ //
1017+ {OpcodeInfo::all, {Size16, 0x0F, 0xAF, _r}, {r16,r_m16}, DU_U },
1018+ {OpcodeInfo::all, {0x0F, 0xAF, _r}, {r32,r_m32}, DU_U },
1019+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xAF, _r}, {r64,r_m64}, DU_U },
1020+ {OpcodeInfo::all, {Size16, 0x6B, _r, ib}, {r16,r_m16,imm8s}, D_DU_U },
1021+ {OpcodeInfo::all, {0x6B, _r, ib}, {r32,r_m32,imm8s}, D_DU_U },
1022+ {OpcodeInfo::em64t, {REX_W, 0x6B, _r, ib}, {r64,r_m64,imm8s}, D_DU_U },
1023+ {OpcodeInfo::all, {Size16, 0x6B, _r, ib}, {r16,imm8s}, DU_U },
1024+ {OpcodeInfo::all, {0x6B, _r, ib}, {r32,imm8s}, DU_U },
1025+ {OpcodeInfo::em64t, {REX_W, 0x6B, _r, ib}, {r64,imm8s}, DU_U },
1026+ {OpcodeInfo::all, {Size16, 0x69, _r, iw}, {r16,r_m16,imm16}, D_U_U },
1027+ {OpcodeInfo::all, {0x69, _r, id}, {r32,r_m32,imm32}, D_U_U },
1028+ {OpcodeInfo::em64t, {REX_W, 0x69, _r, id}, {r64,r_m64,imm32s}, D_U_U },
1029+ {OpcodeInfo::all, {Size16, 0x69, _r, iw}, {r16,imm16}, DU_U },
1030+ {OpcodeInfo::all, {0x69, _r, id}, {r32,imm32}, DU_U },
1031+END_OPCODES()
1032+END_MNEMONIC()
1033+
1034+BEGIN_MNEMONIC(MUL, MF_AFFECTS_FLAGS, U )
1035+BEGIN_OPCODES()
1036+ {OpcodeInfo::all, {0xF6, _4}, {AX, AL, r_m8}, D_DU_U },
1037+ {OpcodeInfo::all, {Size16, 0xF7, _4}, {DX, AX, r_m16}, D_DU_U },
1038+ {OpcodeInfo::all, {0xF7, _4}, {EDX, EAX, r_m32}, D_DU_U },
1039+ {OpcodeInfo::em64t, {REX_W, 0xF7, _4}, {RDX, RAX, r_m64}, D_DU_U },
1040+END_OPCODES()
1041+END_MNEMONIC()
1042+
1043+BEGIN_MNEMONIC(INC, MF_AFFECTS_FLAGS, DU )
1044+BEGIN_OPCODES()
1045+ {OpcodeInfo::all, {0xFE, _0}, {r_m8}, DU },
1046+ {OpcodeInfo::all, {Size16, 0xFF, _0}, {r_m16}, DU },
1047+ {OpcodeInfo::all, {0xFF, _0}, {r_m32}, DU },
1048+ {OpcodeInfo::em64t, {REX_W, 0xFF, _0}, {r_m64}, DU },
1049+ {OpcodeInfo::ia32, {Size16, 0x40|rw}, {r16}, DU },
1050+ {OpcodeInfo::ia32, {0x40|rd}, {r32}, DU },
1051+END_OPCODES()
1052+END_MNEMONIC()
1053+
1054+BEGIN_MNEMONIC(INT3, MF_NONE, N)
1055+BEGIN_OPCODES()
1056+ {OpcodeInfo::all, {0xCC}, {}, N },
1057+END_OPCODES()
1058+END_MNEMONIC()
1059+
1060+#define DEFINE_Jcc_MNEMONIC( cc ) \
1061+ BEGIN_MNEMONIC(J##cc, MF_USES_FLAGS|MF_CONDITIONAL, U ) \
1062+BEGIN_OPCODES() \
1063+ {OpcodeInfo::all, {0x70 + ConditionMnemonic_##cc, cb }, { rel8 }, U }, \
1064+ {OpcodeInfo::ia32, {Size16, 0x0F, 0x80 + ConditionMnemonic_##cc, cw}, { rel16 }, U }, \
1065+ {OpcodeInfo::all, {0x0F, 0x80 + ConditionMnemonic_##cc, cd}, { rel32 }, U }, \
1066+END_OPCODES() \
1067+END_MNEMONIC()
1068+
1069+
1070+DEFINE_Jcc_MNEMONIC(O)
1071+DEFINE_Jcc_MNEMONIC(NO)
1072+DEFINE_Jcc_MNEMONIC(B)
1073+DEFINE_Jcc_MNEMONIC(NB)
1074+DEFINE_Jcc_MNEMONIC(Z)
1075+DEFINE_Jcc_MNEMONIC(NZ)
1076+DEFINE_Jcc_MNEMONIC(BE)
1077+DEFINE_Jcc_MNEMONIC(NBE)
1078+
1079+DEFINE_Jcc_MNEMONIC(S)
1080+DEFINE_Jcc_MNEMONIC(NS)
1081+DEFINE_Jcc_MNEMONIC(P)
1082+DEFINE_Jcc_MNEMONIC(NP)
1083+DEFINE_Jcc_MNEMONIC(L)
1084+DEFINE_Jcc_MNEMONIC(NL)
1085+DEFINE_Jcc_MNEMONIC(LE)
1086+DEFINE_Jcc_MNEMONIC(NLE)
1087+
1088+#undef DEFINE_Jcc_MNEMONIC
1089+
1090+BEGIN_MNEMONIC(JMP, MF_NONE, U )
1091+BEGIN_OPCODES()
1092+ {OpcodeInfo::all, {0xEB, cb}, {rel8}, U },
1093+ {OpcodeInfo::ia32, {Size16, 0xE9, cw}, {rel16}, U },
1094+ {OpcodeInfo::all, {0xE9, cd}, {rel32}, U },
1095+ {OpcodeInfo::ia32, {Size16, 0xFF, _4}, {r_m16}, U },
1096+ {OpcodeInfo::ia32, {0xFF, _4}, {r_m32}, U },
1097+ {OpcodeInfo::em64t, {0xFF, _4}, {r_m64}, U },
1098+END_OPCODES()
1099+END_MNEMONIC()
1100+
1101+BEGIN_MNEMONIC(LEA, MF_NONE, D_U )
1102+BEGIN_OPCODES()
1103+ /*
1104+ A special case: the LEA instruction itself does not care about size of
1105+ second operand. This is obviuos why it is, and thus in The Manual, a
1106+ simple 'm' without size is used.
1107+ However, in the Jitrino's instrucitons we'll have an operand with a size.
1108+ Also, the hashing scheme is not supposed to handle OpndSize_Null, and
1109+ making it to do so will lead to unnecessary complication of hashing
1110+ scheme. Thus, instead of handling it as a special case, we simply make
1111+ copies of the opcodes with sizes set.
1112+ {OpcodeInfo::all, {0x8D, _r}, {r32, m}, D_U },
1113+ {OpcodeInfo::em64t, {0x8D, _r}, {r64, m}, D_U },
1114+ */
1115+ //Android x86: keep r32, m32 only, otherwise, will have decoding error
1116+ //{OpcodeInfo::all, {0x8D, _r}, {r32, m8}, D_U },
1117+ {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m8}, D_U },
1118+ //{OpcodeInfo::all, {0x8D, _r}, {r32, m16}, D_U },
1119+ {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m16}, D_U },
1120+ {OpcodeInfo::all, {0x8D, _r}, {r32, m32}, D_U },
1121+ {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m32}, D_U },
1122+ {OpcodeInfo::all, {0x8D, _r}, {r32, m64}, D_U },
1123+ {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m64}, D_U },
1124+END_OPCODES()
1125+END_MNEMONIC()
1126+
1127+BEGIN_MNEMONIC(LOOP, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U)
1128+BEGIN_OPCODES()
1129+ {OpcodeInfo::all, {0xE2, cb}, {ECX, rel8}, DU_U },
1130+END_OPCODES()
1131+END_MNEMONIC()
1132+
1133+BEGIN_MNEMONIC(LOOPE, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U)
1134+BEGIN_OPCODES()
1135+ {OpcodeInfo::all, {0xE1, cb}, {ECX, rel8}, DU_U },
1136+END_OPCODES()
1137+END_MNEMONIC()
1138+
1139+BEGIN_MNEMONIC(LOOPNE, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U)
1140+BEGIN_OPCODES()
1141+ {OpcodeInfo::all, {0xE0, cb}, {ECX, rel8}, DU_U },
1142+END_OPCODES()
1143+END_MNEMONIC()
1144+
1145+BEGIN_MNEMONIC(MOV, MF_NONE, D_U)
1146+BEGIN_OPCODES()
1147+ {OpcodeInfo::all, {0x88, _r}, {r_m8,r8}, D_U },
1148+
1149+ {OpcodeInfo::all, {Size16, 0x89, _r}, {r_m16,r16}, D_U },
1150+ {OpcodeInfo::all, {0x89, _r}, {r_m32,r32}, D_U },
1151+ {OpcodeInfo::em64t, {REX_W, 0x89, _r}, {r_m64,r64}, D_U },
1152+ {OpcodeInfo::all, {0x8A, _r}, {r8,r_m8}, D_U },
1153+
1154+ {OpcodeInfo::all, {Size16, 0x8B, _r}, {r16,r_m16}, D_U },
1155+ {OpcodeInfo::all, {0x8B, _r}, {r32,r_m32}, D_U },
1156+ {OpcodeInfo::em64t, {REX_W, 0x8B, _r}, {r64,r_m64}, D_U },
1157+
1158+ {OpcodeInfo::all, {0xB0|rb}, {r8,imm8}, D_U },
1159+
1160+ {OpcodeInfo::all, {Size16, 0xB8|rw}, {r16,imm16}, D_U },
1161+ {OpcodeInfo::all, {0xB8|rd}, {r32,imm32}, D_U },
1162+ {OpcodeInfo::em64t, {REX_W, 0xB8|rd}, {r64,imm64}, D_U },
1163+ {OpcodeInfo::all, {0xC6, _0}, {r_m8,imm8}, D_U },
1164+
1165+ {OpcodeInfo::all, {Size16, 0xC7, _0}, {r_m16,imm16}, D_U },
1166+ {OpcodeInfo::all, {0xC7, _0}, {r_m32,imm32}, D_U },
1167+ {OpcodeInfo::em64t, {REX_W, 0xC7, _0}, {r_m64,imm32s}, D_U },
1168+
1169+ {OpcodeInfo::decoder, {0xA0}, {AL, moff8}, D_U },
1170+ {OpcodeInfo::decoder, {Size16, 0xA1}, {AX, moff16}, D_U },
1171+ {OpcodeInfo::decoder, {0xA1}, {EAX, moff32}, D_U },
1172+ //{OpcodeInfo::decoder64, {REX_W, 0xA1}, {RAX, moff64}, D_U },
1173+
1174+ {OpcodeInfo::decoder, {0xA2}, {moff8, AL}, D_U },
1175+ {OpcodeInfo::decoder, {Size16, 0xA3}, {moff16, AX}, D_U },
1176+ {OpcodeInfo::decoder, {0xA3}, {moff32, EAX}, D_U },
1177+ //{OpcodeInfo::decoder64, {REX_W, 0xA3}, {moff64, RAX}, D_U },
1178+END_OPCODES()
1179+END_MNEMONIC()
1180+
1181+
1182+
1183+BEGIN_MNEMONIC(XCHG, MF_NONE, DU_DU )
1184+BEGIN_OPCODES()
1185+ {OpcodeInfo::all, {0x87, _r}, {r_m32,r32}, DU_DU },
1186+END_OPCODES()
1187+END_MNEMONIC()
1188+
1189+
1190+BEGIN_MNEMONIC(MOVQ, MF_NONE, D_U )
1191+BEGIN_OPCODES()
1192+#ifdef _HAVE_MMX_
1193+ {OpcodeInfo::all, {0x0F, 0x6F, _r}, {mm64, mm_m64}, D_U },
1194+ {OpcodeInfo::all, {0x0F, 0x7F, _r}, {mm_m64, mm64}, D_U },
1195+#endif
1196+ {OpcodeInfo::all, {0xF3, 0x0F, 0x7E }, {xmm64, xmm_m64}, D_U },
1197+ {OpcodeInfo::all, {0x66, 0x0F, 0xD6 }, {xmm_m64, xmm64}, D_U },
1198+// {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x6E, _r}, {xmm64, r_m64}, D_U },
1199+// {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x7E, _r}, {r_m64, xmm64}, D_U },
1200+ {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x6E, _r}, {xmm64, r64}, D_U },
1201+ {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x7E, _r}, {r64, xmm64}, D_U },
1202+END_OPCODES()
1203+END_MNEMONIC()
1204+
1205+
1206+BEGIN_MNEMONIC(MOVD, MF_NONE, D_U )
1207+BEGIN_OPCODES()
1208+ {OpcodeInfo::all, {0x66, 0x0F, 0x6E, _r}, {xmm32, r_m32}, D_U },
1209+ {OpcodeInfo::all, {0x66, 0x0F, 0x7E, _r}, {r_m32, xmm32}, D_U },
1210+END_OPCODES()
1211+END_MNEMONIC()
1212+
1213+//
1214+// A bunch of MMX instructions
1215+//
1216+#ifdef _HAVE_MMX_
1217+
1218+BEGIN_MNEMONIC(EMMS, MF_NONE, N)
1219+BEGIN_OPCODES()
1220+ {OpcodeInfo::all, {0x0F, 0x77}, {}, N },
1221+END_OPCODES()
1222+END_MNEMONIC()
1223+
1224+#endif
1225+
1226+BEGIN_MNEMONIC(PADDQ, MF_NONE, DU_U)
1227+BEGIN_OPCODES()
1228+#ifdef _HAVE_MMX_
1229+ {OpcodeInfo::all, {0x0F, 0xD4, _r}, {mm64, mm_m64}, DU_U },
1230+#endif
1231+ {OpcodeInfo::all, {0x66, 0x0F, 0xD4, _r}, {xmm64, xmm_m64}, DU_U },
1232+END_OPCODES()
1233+END_MNEMONIC()
1234+
1235+BEGIN_MNEMONIC(PAND, MF_NONE, DU_U)
1236+BEGIN_OPCODES()
1237+#ifdef _HAVE_MMX_
1238+ {OpcodeInfo::all, {0x0F, 0xDB, _r}, {mm64, mm_m64}, DU_U },
1239+#endif
1240+ {OpcodeInfo::all, {0x66, 0x0F, 0xDB, _r}, {xmm64, xmm_m64}, DU_U },
1241+END_OPCODES()
1242+END_MNEMONIC()
1243+
1244+BEGIN_MNEMONIC(POR, MF_NONE, DU_U)
1245+BEGIN_OPCODES()
1246+#ifdef _HAVE_MMX_
1247+ {OpcodeInfo::all, {0x0F, 0xEB, _r}, {mm64, mm_m64}, DU_U },
1248+#endif
1249+ {OpcodeInfo::all, {0x66, 0x0F, 0xEB, _r}, {xmm64, xmm_m64}, DU_U },
1250+END_OPCODES()
1251+END_MNEMONIC()
1252+
1253+BEGIN_MNEMONIC(PSUBQ, MF_NONE, DU_U)
1254+BEGIN_OPCODES()
1255+#ifdef _HAVE_MMX_
1256+ {OpcodeInfo::all, {0x0F, 0xFB, _r}, {mm64, mm_m64}, DU_U },
1257+#endif
1258+ {OpcodeInfo::all, {0x66, 0x0F, 0xFB, _r}, {xmm64, xmm_m64}, DU_U },
1259+END_OPCODES()
1260+END_MNEMONIC()
1261+
1262+BEGIN_MNEMONIC(PANDN, MF_NONE, DU_U)
1263+BEGIN_OPCODES()
1264+#ifdef _HAVE_MMX_
1265+ {OpcodeInfo::all, {0x0F, 0xDF, _r}, {mm64, mm_m64}, DU_U },
1266+#endif
1267+ {OpcodeInfo::all, {0x66, 0x0F, 0xDF, _r}, {xmm64, xmm_m64}, DU_U },
1268+END_OPCODES()
1269+END_MNEMONIC()
1270+BEGIN_MNEMONIC(PSLLQ, MF_NONE, DU_U)
1271+BEGIN_OPCODES()
1272+#ifdef _HAVE_MMX_
1273+ {OpcodeInfo::all, {0x0F, 0xF3, _r}, {mm64, mm_m64}, DU_U },
1274+#endif
1275+ {OpcodeInfo::all, {0x66, 0x0F, 0xF3, _r}, {xmm64, xmm_m64}, DU_U },
1276+ {OpcodeInfo::all, {0x66, 0x0F, 0x73, _6, ib}, {xmm64, imm8}, DU_U },
1277+END_OPCODES()
1278+END_MNEMONIC()
1279+BEGIN_MNEMONIC(PSRLQ, MF_NONE, DU_U)
1280+BEGIN_OPCODES()
1281+#ifdef _HAVE_MMX_
1282+ {OpcodeInfo::all, {0x0F, 0xD3, _r}, {mm64, mm_m64}, DU_U },
1283+#endif
1284+ {OpcodeInfo::all, {0x66, 0x0F, 0xD3, _r}, {xmm64, xmm_m64}, DU_U },
1285+ {OpcodeInfo::all, {0x66, 0x0F, 0x73, _2, ib}, {xmm64, imm8}, DU_U },
1286+END_OPCODES()
1287+END_MNEMONIC()
1288+
1289+BEGIN_MNEMONIC(PXOR, MF_NONE, DU_U)
1290+BEGIN_OPCODES()
1291+#ifdef _HAVE_MMX_
1292+ {OpcodeInfo::all, {0x0F, 0xEF, _r}, {mm64, mm_m64}, DU_U },
1293+#endif
1294+ {OpcodeInfo::all, {0x66, 0x0F, 0xEF, _r}, {xmm64, xmm_m64}, DU_U },
1295+END_OPCODES()
1296+END_MNEMONIC()
1297+
1298+
1299+BEGIN_MNEMONIC(MOVAPD, MF_NONE, D_U )
1300+BEGIN_OPCODES()
1301+ {OpcodeInfo::all, {0x66, 0x0F, 0x28, _r}, {xmm64, xmm_m64}, D_U },
1302+ {OpcodeInfo::all, {0x66, 0x0F, 0x29, _r}, {xmm_m64, xmm64}, D_U },
1303+END_OPCODES()
1304+END_MNEMONIC()
1305+
1306+BEGIN_MNEMONIC(MOVAPS, MF_NONE, D_U )
1307+BEGIN_OPCODES()
1308+ {OpcodeInfo::all, {0x0F, 0x28, _r}, {xmm64, xmm_m64}, D_U },
1309+ {OpcodeInfo::all, {0x0F, 0x29, _r}, {xmm_m64, xmm64}, D_U },
1310+END_OPCODES()
1311+END_MNEMONIC()
1312+
1313+BEGIN_MNEMONIC(SHUFPS, MF_NONE, D_U_U )
1314+BEGIN_OPCODES()
1315+ {OpcodeInfo::all, {0x0F, 0xC6, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U },
1316+END_OPCODES()
1317+END_MNEMONIC()
1318+
1319+
1320+BEGIN_MNEMONIC(MOVSD, MF_NONE, D_U )
1321+BEGIN_OPCODES()
1322+ {OpcodeInfo::all, {0xF2, 0x0F, 0x10, _r}, {xmm64, xmm_m64}, D_U },
1323+ {OpcodeInfo::all, {0xF2, 0x0F, 0x11, _r}, {xmm_m64, xmm64}, D_U },
1324+END_OPCODES()
1325+END_MNEMONIC()
1326+
1327+BEGIN_MNEMONIC(MOVSS, MF_NONE, D_U )
1328+BEGIN_OPCODES()
1329+ {OpcodeInfo::all, {0xF3, 0x0F, 0x10, _r}, {xmm32, xmm_m32}, D_U },
1330+ {OpcodeInfo::all, {0xF3, 0x0F, 0x11, _r}, {xmm_m32, xmm32}, D_U },
1331+END_OPCODES()
1332+END_MNEMONIC()
1333+
1334+BEGIN_MNEMONIC(MOVSX, MF_NONE, D_U )
1335+BEGIN_OPCODES()
1336+ {OpcodeInfo::all, {Size16, 0x0F, 0xBE, _r}, {r16, r_m8s}, D_U },
1337+ {OpcodeInfo::all, {0x0F, 0xBE, _r}, {r32, r_m8s}, D_U },
1338+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xBE, _r}, {r64, r_m8s}, D_U },
1339+
1340+ {OpcodeInfo::all, {0x0F, 0xBF, _r}, {r32, r_m16s}, D_U },
1341+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xBF, _r}, {r64, r_m16s}, D_U },
1342+
1343+ {OpcodeInfo::em64t, {REX_W, 0x63, _r}, {r64, r_m32s}, D_U },
1344+END_OPCODES()
1345+END_MNEMONIC()
1346+
1347+BEGIN_MNEMONIC(MOVZX, MF_NONE, D_U )
1348+BEGIN_OPCODES()
1349+ {OpcodeInfo::all, {Size16, 0x0F, 0xB6, _r}, {r16, r_m8u}, D_U },
1350+ {OpcodeInfo::all, {0x0F, 0xB6, _r}, {r32, r_m8u}, D_U },
1351+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB6, _r}, {r64, r_m8u}, D_U },
1352+
1353+ {OpcodeInfo::all, {0x0F, 0xB7, _r}, {r32, r_m16u}, D_U },
1354+ {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB7, _r}, {r64, r_m16u}, D_U },
1355+ //workaround to get r/rm32->r64 ZX mov functionality:
1356+ //simple 32bit reg copying zeros high bits in 64bit reg
1357+ {OpcodeInfo::em64t, {0x8B, _r}, {r64, r_m32u}, D_U },
1358+END_OPCODES()
1359+END_MNEMONIC()
1360+
1361+BEGIN_MNEMONIC(MULSD, MF_NONE, DU_U)
1362+BEGIN_OPCODES()
1363+ {OpcodeInfo::all, {0xF2, 0x0F, 0x59, _r}, {xmm64, xmm_m64}, DU_U },
1364+END_OPCODES()
1365+END_MNEMONIC()
1366+
1367+BEGIN_MNEMONIC(MULSS, MF_NONE, DU_U)
1368+BEGIN_OPCODES()
1369+ {OpcodeInfo::all, {0xF3, 0x0F, 0x59, _r}, {xmm32, xmm_m32}, DU_U },
1370+END_OPCODES()
1371+END_MNEMONIC()
1372+
1373+BEGIN_MNEMONIC(NEG, MF_AFFECTS_FLAGS, DU )
1374+BEGIN_OPCODES()
1375+ {OpcodeInfo::all, {0xF6, _3}, {r_m8}, DU },
1376+
1377+ {OpcodeInfo::all, {Size16, 0xF7, _3}, {r_m16}, DU },
1378+ {OpcodeInfo::all, {0xF7, _3}, {r_m32}, DU },
1379+ {OpcodeInfo::em64t, {REX_W, 0xF7, _3}, {r_m64}, DU },
1380+END_OPCODES()
1381+END_MNEMONIC()
1382+
1383+BEGIN_MNEMONIC(NOP, MF_NONE, N)
1384+BEGIN_OPCODES()
1385+ {OpcodeInfo::all, {0x90}, {}, N },
1386+END_OPCODES()
1387+END_MNEMONIC()
1388+
1389+BEGIN_MNEMONIC(NOT, MF_AFFECTS_FLAGS, DU )
1390+BEGIN_OPCODES()
1391+ {OpcodeInfo::all, {0xF6, _2}, {r_m8}, DU },
1392+ {OpcodeInfo::all, {Size16, 0xF7, _2}, {r_m16}, DU },
1393+ {OpcodeInfo::all, {0xF7, _2}, {r_m32}, DU },
1394+ {OpcodeInfo::em64t, {REX_W, 0xF7, _2}, {r_m64}, DU },
1395+END_OPCODES()
1396+END_MNEMONIC()
1397+
1398+BEGIN_MNEMONIC(POP, MF_NONE, D)
1399+BEGIN_OPCODES()
1400+ {OpcodeInfo::all, {Size16, 0x8F, _0}, {r_m16}, D },
1401+ {OpcodeInfo::ia32, {0x8F, _0}, {r_m32}, D },
1402+ {OpcodeInfo::em64t, {0x8F, _0}, {r_m64}, D },
1403+
1404+ {OpcodeInfo::all, {Size16, 0x58|rw }, {r16}, D },
1405+ {OpcodeInfo::ia32, {0x58|rd }, {r32}, D },
1406+ {OpcodeInfo::em64t, {0x58|rd }, {r64}, D },
1407+END_OPCODES()
1408+END_MNEMONIC()
1409+
1410+BEGIN_MNEMONIC(POPFD, MF_AFFECTS_FLAGS, N)
1411+BEGIN_OPCODES()
1412+ {OpcodeInfo::all, {0x9D}, {}, N },
1413+END_OPCODES()
1414+END_MNEMONIC()
1415+
1416+BEGIN_MNEMONIC(PREFETCH, MF_NONE, U)
1417+BEGIN_OPCODES()
1418+ {OpcodeInfo::all, {0x0F, 0x18, _0}, {m8}, U },
1419+END_OPCODES()
1420+END_MNEMONIC()
1421+
1422+BEGIN_MNEMONIC(PUSH, MF_NONE, U )
1423+BEGIN_OPCODES()
1424+ {OpcodeInfo::all, {Size16, 0xFF, _6}, {r_m16}, U },
1425+ {OpcodeInfo::ia32, {0xFF, _6}, {r_m32}, U },
1426+ {OpcodeInfo::em64t, {0xFF, _6}, {r_m64}, U },
1427+
1428+ {OpcodeInfo::all, {Size16, 0x50|rw }, {r16}, U },
1429+ {OpcodeInfo::ia32, {0x50|rd }, {r32}, U },
1430+ {OpcodeInfo::em64t, {0x50|rd }, {r64}, U },
1431+
1432+ {OpcodeInfo::all, {0x6A}, {imm8}, U },
1433+ {OpcodeInfo::all, {Size16, 0x68}, {imm16}, U },
1434+ {OpcodeInfo::ia32, {0x68}, {imm32}, U },
1435+// {OpcodeInfo::em64t, {0x68}, {imm64}, U },
1436+END_OPCODES()
1437+END_MNEMONIC()
1438+
1439+BEGIN_MNEMONIC(PUSHFD, MF_USES_FLAGS, N)
1440+BEGIN_OPCODES()
1441+ {OpcodeInfo::all, {0x9C}, {}, N },
1442+END_OPCODES()
1443+END_MNEMONIC()
1444+
1445+
1446+BEGIN_MNEMONIC(RET, MF_NONE, N)
1447+BEGIN_OPCODES()
1448+ {OpcodeInfo::all, {0xC3}, {}, N },
1449+ {OpcodeInfo::all, {0xC2, iw}, {imm16}, U },
1450+END_OPCODES()
1451+END_MNEMONIC()
1452+
1453+#define DEFINE_SETcc_MNEMONIC( cc ) \
1454+ BEGIN_MNEMONIC(SET##cc, MF_USES_FLAGS|MF_CONDITIONAL, DU) \
1455+BEGIN_OPCODES() \
1456+ {OpcodeInfo::all, {0x0F, 0x90 + ConditionMnemonic_##cc}, {r_m8}, DU }, \
1457+END_OPCODES() \
1458+END_MNEMONIC()
1459+
1460+DEFINE_SETcc_MNEMONIC(O)
1461+DEFINE_SETcc_MNEMONIC(NO)
1462+DEFINE_SETcc_MNEMONIC(B)
1463+DEFINE_SETcc_MNEMONIC(NB)
1464+DEFINE_SETcc_MNEMONIC(Z)
1465+DEFINE_SETcc_MNEMONIC(NZ)
1466+DEFINE_SETcc_MNEMONIC(BE)
1467+DEFINE_SETcc_MNEMONIC(NBE)
1468+
1469+DEFINE_SETcc_MNEMONIC(S)
1470+DEFINE_SETcc_MNEMONIC(NS)
1471+DEFINE_SETcc_MNEMONIC(P)
1472+DEFINE_SETcc_MNEMONIC(NP)
1473+DEFINE_SETcc_MNEMONIC(L)
1474+DEFINE_SETcc_MNEMONIC(NL)
1475+DEFINE_SETcc_MNEMONIC(LE)
1476+DEFINE_SETcc_MNEMONIC(NLE)
1477+
1478+#undef DEFINE_SETcc_MNEMONIC
1479+
1480+#define DEFINE_SHIFT_MNEMONIC(nam, slash_num, flags) \
1481+BEGIN_MNEMONIC(nam, flags, DU_U) \
1482+BEGIN_OPCODES()\
1483+ /* D0 & D1 opcodes are added w/o 2nd operand (1) because */\
1484+ /* they are used for decoding only so only instruction length is needed */\
1485+ {OpcodeInfo::decoder, {0xD0, slash_num}, {r_m8/*,const_1*/}, DU },\
1486+ {OpcodeInfo::all, {0xD2, slash_num}, {r_m8, CL}, DU_U },\
1487+ {OpcodeInfo::all, {0xC0, slash_num, ib}, {r_m8, imm8}, DU_U },\
1488+\
1489+ {OpcodeInfo::decoder, {Size16, 0xD1, slash_num}, {r_m16/*,const_1*/}, DU },\
1490+ {OpcodeInfo::all, {Size16, 0xD3, slash_num}, {r_m16, CL}, DU_U },\
1491+ {OpcodeInfo::all, {Size16, 0xC1, slash_num, ib}, {r_m16, imm8 }, DU_U },\
1492+\
1493+ {OpcodeInfo::decoder, {0xD1, slash_num}, {r_m32/*,const_1*/}, DU },\
1494+ {OpcodeInfo::decoder64, {REX_W, 0xD1, slash_num}, {r_m64/*,const_1*/}, DU },\
1495+\
1496+ {OpcodeInfo::all, {0xD3, slash_num}, {r_m32, CL}, DU_U },\
1497+ {OpcodeInfo::em64t, {REX_W, 0xD3, slash_num}, {r_m64, CL}, DU_U },\
1498+\
1499+ {OpcodeInfo::all, {0xC1, slash_num, ib}, {r_m32, imm8}, DU_U },\
1500+ {OpcodeInfo::em64t, {REX_W, 0xC1, slash_num, ib}, {r_m64, imm8}, DU_U },\
1501+END_OPCODES()\
1502+END_MNEMONIC()
1503+
1504+
1505+DEFINE_SHIFT_MNEMONIC(ROL, _0, MF_AFFECTS_FLAGS)
1506+DEFINE_SHIFT_MNEMONIC(ROR, _1, MF_AFFECTS_FLAGS)
1507+DEFINE_SHIFT_MNEMONIC(RCL, _2, MF_AFFECTS_FLAGS|MF_USES_FLAGS)
1508+DEFINE_SHIFT_MNEMONIC(RCR, _3, MF_AFFECTS_FLAGS|MF_USES_FLAGS)
1509+
1510+DEFINE_SHIFT_MNEMONIC(SAL, _4, MF_AFFECTS_FLAGS)
1511+DEFINE_SHIFT_MNEMONIC(SHR, _5, MF_AFFECTS_FLAGS)
1512+DEFINE_SHIFT_MNEMONIC(SAR, _7, MF_AFFECTS_FLAGS)
1513+
1514+#undef DEFINE_SHIFT_MNEMONIC
1515+
1516+BEGIN_MNEMONIC(SHLD, MF_AFFECTS_FLAGS, N)
1517+BEGIN_OPCODES()
1518+ {OpcodeInfo::all, {0x0F, 0xA5}, {r_m32, r32, CL}, DU_DU_U },
1519+ {OpcodeInfo::all, {0x0F, 0xA4}, {r_m32, r32, imm8}, DU_DU_U },
1520+END_OPCODES()
1521+END_MNEMONIC()
1522+
1523+BEGIN_MNEMONIC(SHRD, MF_AFFECTS_FLAGS, N)
1524+// TODO: the def/use info is wrong
1525+BEGIN_OPCODES()
1526+ {OpcodeInfo::all, {0x0F, 0xAD}, {r_m32, r32, CL}, DU_DU_U },
1527+END_OPCODES()
1528+END_MNEMONIC()
1529+
1530+
1531+BEGIN_MNEMONIC(SUBSD, MF_NONE, DU_U)
1532+BEGIN_OPCODES()
1533+ {OpcodeInfo::all, {0xF2, 0x0F, 0x5C, _r}, {xmm64, xmm_m64}, DU_U },
1534+END_OPCODES()
1535+END_MNEMONIC()
1536+
1537+BEGIN_MNEMONIC(SUBSS, MF_NONE, DU_U)
1538+BEGIN_OPCODES()
1539+ {OpcodeInfo::all, {0xF3, 0x0F, 0x5C, _r}, {xmm32, xmm_m32}, DU_U },
1540+END_OPCODES()
1541+END_MNEMONIC()
1542+
1543+BEGIN_MNEMONIC(TEST, MF_AFFECTS_FLAGS, U_U)
1544+BEGIN_OPCODES()
1545+
1546+ {OpcodeInfo::decoder, {0xA8, ib}, { AL, imm8}, U_U },
1547+ {OpcodeInfo::decoder, {0xA9, iw}, { AX, imm16}, U_U },
1548+ {OpcodeInfo::decoder, {0xA9, id}, { EAX, imm32}, U_U },
1549+ {OpcodeInfo::decoder64, {REX_W, 0xA9, id}, { RAX, imm32s}, U_U },
1550+
1551+ {OpcodeInfo::all, {0xF6, _0, ib}, {r_m8,imm8}, U_U },
1552+
1553+ {OpcodeInfo::all, {Size16, 0xF7, _0, iw}, {r_m16,imm16}, U_U },
1554+ {OpcodeInfo::all, {0xF7, _0, id}, {r_m32,imm32}, U_U },
1555+ {OpcodeInfo::em64t, {REX_W, 0xF7, _0, id}, {r_m64,imm32s}, U_U },
1556+
1557+ {OpcodeInfo::all, {0x84, _r}, {r_m8,r8}, U_U },
1558+
1559+ {OpcodeInfo::all, {Size16, 0x85, _r}, {r_m16,r16}, U_U },
1560+ {OpcodeInfo::all, {0x85, _r}, {r_m32,r32}, U_U },
1561+ {OpcodeInfo::em64t, {REX_W, 0x85, _r}, {r_m64,r64}, U_U },
1562+END_OPCODES()
1563+END_MNEMONIC()
1564+
1565+
1566+BEGIN_MNEMONIC(UCOMISD, MF_AFFECTS_FLAGS, U_U)
1567+BEGIN_OPCODES()
1568+ {OpcodeInfo::all, {0x66, 0x0F, 0x2E, _r}, {xmm64, xmm_m64}, U_U },
1569+END_OPCODES()
1570+END_MNEMONIC()
1571+
1572+BEGIN_MNEMONIC(UCOMISS, MF_AFFECTS_FLAGS, U_U)
1573+BEGIN_OPCODES()
1574+ {OpcodeInfo::all, {0x0F, 0x2E, _r}, {xmm32, xmm_m32}, U_U },
1575+END_OPCODES()
1576+END_MNEMONIC()
1577+
1578+BEGIN_MNEMONIC(COMISD, MF_AFFECTS_FLAGS, U_U)
1579+BEGIN_OPCODES()
1580+ {OpcodeInfo::all, {0x66, 0x0F, 0x2F, _r}, {xmm64, xmm_m64}, U_U },
1581+END_OPCODES()
1582+END_MNEMONIC()
1583+
1584+BEGIN_MNEMONIC(COMISS, MF_AFFECTS_FLAGS, U_U)
1585+BEGIN_OPCODES()
1586+ {OpcodeInfo::all, {0x0F, 0x2F, _r}, {xmm32, xmm_m32}, U_U },
1587+END_OPCODES()
1588+END_MNEMONIC()
1589+
1590+BEGIN_MNEMONIC(XORPD, MF_SAME_ARG_NO_USE|MF_SYMMETRIC, DU_U)
1591+BEGIN_OPCODES()
1592+ //Note: they're actually 128 bits
1593+ {OpcodeInfo::all, {0x66, 0x0F, 0x57, _r}, {xmm64, xmm_m64}, DU_U },
1594+END_OPCODES()
1595+END_MNEMONIC()
1596+
1597+BEGIN_MNEMONIC(XORPS, MF_SAME_ARG_NO_USE|MF_SYMMETRIC, DU_U)
1598+BEGIN_OPCODES()
1599+ //Note: they're actually 128 bits
1600+ {OpcodeInfo::all, {0x0F, 0x57, _r}, {xmm32, xmm_m32}, DU_U },
1601+END_OPCODES()
1602+END_MNEMONIC()
1603+
1604+BEGIN_MNEMONIC(CVTDQ2PD, MF_NONE, D_U )
1605+BEGIN_OPCODES()
1606+ //Note: they're actually 128 bits
1607+ {OpcodeInfo::all, {0xF3, 0x0F, 0xE6}, {xmm64, xmm_m64}, D_U },
1608+END_OPCODES()
1609+END_MNEMONIC()
1610+
1611+BEGIN_MNEMONIC(CVTDQ2PS, MF_NONE, D_U )
1612+BEGIN_OPCODES()
1613+ //Note: they're actually 128 bits
1614+ {OpcodeInfo::all, {0x0F, 0x5B, _r}, {xmm32, xmm_m32}, D_U },
1615+END_OPCODES()
1616+END_MNEMONIC()
1617+
1618+BEGIN_MNEMONIC(CVTTPD2DQ, MF_NONE, D_U )
1619+BEGIN_OPCODES()
1620+ //Note: they're actually 128 bits
1621+ {OpcodeInfo::all, {0x66, 0x0F, 0xE6}, {xmm64, xmm_m64}, D_U },
1622+END_OPCODES()
1623+END_MNEMONIC()
1624+
1625+BEGIN_MNEMONIC(CVTTPS2DQ, MF_NONE, D_U )
1626+BEGIN_OPCODES()
1627+ //Note: they're actually 128 bits
1628+ {OpcodeInfo::all, {0xF3, 0x0F, 0x5B, _r}, {xmm32, xmm_m32}, D_U },
1629+END_OPCODES()
1630+END_MNEMONIC()
1631+
1632+//
1633+// String operations
1634+//
1635+BEGIN_MNEMONIC(STD, MF_AFFECTS_FLAGS, N)
1636+BEGIN_OPCODES()
1637+ {OpcodeInfo::all, {0xFD}, {}, N },
1638+END_OPCODES()
1639+END_MNEMONIC()
1640+
1641+BEGIN_MNEMONIC(CLD, MF_AFFECTS_FLAGS, N)
1642+BEGIN_OPCODES()
1643+ {OpcodeInfo::all, {0xFC}, {}, N },
1644+END_OPCODES()
1645+END_MNEMONIC()
1646+
1647+BEGIN_MNEMONIC(SCAS, MF_AFFECTS_FLAGS, N)
1648+// to be symmetric, this mnemonic must have either m32 or RegName_EAX
1649+// but as long, as Jitrino's CG does not use the mnemonic, leaving it
1650+// in its natural form
1651+BEGIN_OPCODES()
1652+ {OpcodeInfo::all, {0xAF}, {}, N },
1653+END_OPCODES()
1654+END_MNEMONIC()
1655+
1656+BEGIN_MNEMONIC(STOS, MF_AFFECTS_FLAGS, DU_DU_U)
1657+BEGIN_OPCODES()
1658+ {OpcodeInfo::all, {0xAB}, {EDI, ECX, EAX}, DU_DU_U },
1659+ {OpcodeInfo::all, {0xAA}, {EDI, ECX, AL}, DU_DU_U },
1660+ {OpcodeInfo::em64t, {REX_W, 0xAB}, {RDI, RCX, RAX}, DU_DU_U },
1661+END_OPCODES()
1662+END_MNEMONIC()
1663+
1664+/*
1665+MOVS and CMPS are the special cases.
1666+Most the code in both CG and Encoder do not expect 2 memory operands.
1667+Also, they are not supposed to setup constrains on which register the
1668+memory reference must reside - m8,m8 or m32,m32 is not the choice.
1669+We can't use r8,r8 either - will have problem with 8bit EDI, ESI.
1670+So, as the workaround we do r32,r32 and specify size of the operand through
1671+the specific mnemonic - the same is in the codegen.
1672+*/
1673+BEGIN_MNEMONIC(MOVS8, MF_NONE, DU_DU_DU)
1674+BEGIN_OPCODES()
1675+ {OpcodeInfo::ia32, {0xA4}, {r32,r32,ECX}, DU_DU_DU },
1676+ {OpcodeInfo::em64t, {0xA4}, {r64,r64,RCX}, DU_DU_DU },
1677+END_OPCODES()
1678+END_MNEMONIC()
1679+
1680+BEGIN_MNEMONIC(MOVS16, MF_NONE, DU_DU_DU)
1681+BEGIN_OPCODES()
1682+ {OpcodeInfo::ia32, {Size16, 0xA5}, {r32,r32,ECX}, DU_DU_DU },
1683+ {OpcodeInfo::em64t, {Size16, 0xA5}, {r64,r64,RCX}, DU_DU_DU },
1684+END_OPCODES()
1685+END_MNEMONIC()
1686+
1687+BEGIN_MNEMONIC(MOVS32, MF_NONE, DU_DU_DU)
1688+BEGIN_OPCODES()
1689+ {OpcodeInfo::ia32, {0xA5}, {r32,r32,ECX}, DU_DU_DU },
1690+ {OpcodeInfo::em64t, {0xA5}, {r64,r64,RCX}, DU_DU_DU },
1691+END_OPCODES()
1692+END_MNEMONIC()
1693+
1694+BEGIN_MNEMONIC(MOVS64, MF_NONE, DU_DU_DU)
1695+BEGIN_OPCODES()
1696+ {OpcodeInfo::em64t, {REX_W,0xA5}, {r64,r64,RCX}, DU_DU_DU },
1697+END_OPCODES()
1698+END_MNEMONIC()
1699+
1700+BEGIN_MNEMONIC(CMPSB, MF_AFFECTS_FLAGS, DU_DU_DU)
1701+BEGIN_OPCODES()
1702+ {OpcodeInfo::ia32, {0xA6}, {ESI,EDI,ECX}, DU_DU_DU },
1703+ {OpcodeInfo::em64t, {0xA6}, {RSI,RDI,RCX}, DU_DU_DU },
1704+END_OPCODES()
1705+END_MNEMONIC()
1706+
1707+BEGIN_MNEMONIC(CMPSW, MF_AFFECTS_FLAGS, DU_DU_DU)
1708+BEGIN_OPCODES()
1709+ {OpcodeInfo::ia32, {Size16, 0xA7}, {ESI,EDI,ECX}, DU_DU_DU },
1710+ {OpcodeInfo::em64t, {Size16, 0xA7}, {RSI,RDI,RCX}, DU_DU_DU },
1711+END_OPCODES()
1712+END_MNEMONIC()
1713+
1714+BEGIN_MNEMONIC(CMPSD, MF_AFFECTS_FLAGS, DU_DU_DU)
1715+BEGIN_OPCODES()
1716+ {OpcodeInfo::ia32, {0xA7}, {ESI,EDI,ECX}, DU_DU_DU },
1717+ {OpcodeInfo::em64t, {0xA7}, {RSI,RDI,RCX}, DU_DU_DU },
1718+END_OPCODES()
1719+END_MNEMONIC()
1720+
1721+
1722+BEGIN_MNEMONIC(WAIT, MF_AFFECTS_FLAGS, N)
1723+BEGIN_OPCODES()
1724+ {OpcodeInfo::all, {0x9B}, {}, N },
1725+END_OPCODES()
1726+END_MNEMONIC()
1727+
1728+//
1729+// ~String operations
1730+//
1731+
1732+//
1733+//Note: the instructions below added for the sake of disassembling routine.
1734+// They need to have flags, params and params usage to be defined more precisely.
1735+//
1736+BEGIN_MNEMONIC(LEAVE, MF_NONE, N)
1737+BEGIN_OPCODES()
1738+ {OpcodeInfo::decoder, {0xC9}, {}, N },
1739+END_OPCODES()
1740+END_MNEMONIC()
1741+
1742+BEGIN_MNEMONIC(ENTER, MF_NONE, N)
1743+BEGIN_OPCODES()
1744+ {OpcodeInfo::decoder, {0xC8, iw, ib}, {imm16, imm8}, N },
1745+END_OPCODES()
1746+END_MNEMONIC()
1747+
1748+BEGIN_MNEMONIC(PADDB, MF_NONE, DU_U)
1749+BEGIN_OPCODES()
1750+ {OpcodeInfo::all, {0x66, 0x0F, 0xFC, _r}, {xmm64, xmm_m64}, DU_U },
1751+END_OPCODES()
1752+END_MNEMONIC()
1753+
1754+BEGIN_MNEMONIC(PADDW, MF_NONE, DU_U)
1755+BEGIN_OPCODES()
1756+ {OpcodeInfo::all, {0x66, 0x0F, 0xFD, _r}, {xmm64, xmm_m64}, DU_U },
1757+END_OPCODES()
1758+END_MNEMONIC()
1759+
1760+BEGIN_MNEMONIC(PADDD, MF_NONE, DU_U)
1761+BEGIN_OPCODES()
1762+ {OpcodeInfo::all, {0x66, 0x0F, 0xFE, _r}, {xmm64, xmm_m64}, DU_U },
1763+END_OPCODES()
1764+END_MNEMONIC()
1765+
1766+BEGIN_MNEMONIC(PSUBB, MF_NONE, DU_U)
1767+BEGIN_OPCODES()
1768+ {OpcodeInfo::all, {0x66, 0x0F, 0xF8, _r}, {xmm64, xmm_m64}, DU_U },
1769+END_OPCODES()
1770+END_MNEMONIC()
1771+
1772+BEGIN_MNEMONIC(PSUBW, MF_NONE, DU_U)
1773+BEGIN_OPCODES()
1774+ {OpcodeInfo::all, {0x66, 0x0F, 0xF9, _r}, {xmm64, xmm_m64}, DU_U },
1775+END_OPCODES()
1776+END_MNEMONIC()
1777+
1778+BEGIN_MNEMONIC(PSUBD, MF_NONE, DU_U)
1779+BEGIN_OPCODES()
1780+ {OpcodeInfo::all, {0x66, 0x0F, 0xFA, _r}, {xmm64, xmm_m64}, DU_U },
1781+END_OPCODES()
1782+END_MNEMONIC()
1783+
1784+BEGIN_MNEMONIC(PMULLW, MF_NONE, DU_U)
1785+BEGIN_OPCODES()
1786+ {OpcodeInfo::all, {0x66, 0x0F, 0xD5, _r}, {xmm64, xmm_m64}, DU_U },
1787+END_OPCODES()
1788+END_MNEMONIC()
1789+
1790+BEGIN_MNEMONIC(PMULLD, MF_NONE, DU_U)
1791+BEGIN_OPCODES()
1792+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x40, _r}, {xmm64, xmm_m64}, DU_U },
1793+END_OPCODES()
1794+END_MNEMONIC()
1795+
1796+BEGIN_MNEMONIC(PSLLW, MF_NONE, DU_U)
1797+BEGIN_OPCODES()
1798+ {OpcodeInfo::all, {0x66, 0x0F, 0xF1, _r}, {xmm64, xmm_m64}, DU_U },
1799+ {OpcodeInfo::all, {0x66, 0x0F, 0x71, _6, ib}, {xmm64, imm8}, DU_U },
1800+END_OPCODES()
1801+END_MNEMONIC()
1802+
1803+BEGIN_MNEMONIC(PSLLD, MF_NONE, DU_U)
1804+BEGIN_OPCODES()
1805+ {OpcodeInfo::all, {0x66, 0x0F, 0xF2, _r}, {xmm64, xmm_m64}, DU_U },
1806+ {OpcodeInfo::all, {0x66, 0x0F, 0x72, _6, ib}, {xmm64, imm8}, DU_U },
1807+END_OPCODES()
1808+END_MNEMONIC()
1809+
1810+BEGIN_MNEMONIC(PSRAW, MF_NONE, DU_U)
1811+BEGIN_OPCODES()
1812+ {OpcodeInfo::all, {0x66, 0x0F, 0xE1, _r}, {xmm64, xmm_m64}, DU_U },
1813+ {OpcodeInfo::all, {0x66, 0x0F, 0x71, _4, ib}, {xmm64, imm8}, DU_U },
1814+END_OPCODES()
1815+END_MNEMONIC()
1816+
1817+BEGIN_MNEMONIC(PSRAD, MF_NONE, DU_U)
1818+BEGIN_OPCODES()
1819+ {OpcodeInfo::all, {0x66, 0x0F, 0xE2, _r}, {xmm64, xmm_m64}, DU_U },
1820+ {OpcodeInfo::all, {0x66, 0x0F, 0x72, _4, ib}, {xmm64, imm8}, DU_U },
1821+END_OPCODES()
1822+END_MNEMONIC()
1823+
1824+BEGIN_MNEMONIC(PSRLW, MF_NONE, DU_U)
1825+BEGIN_OPCODES()
1826+ {OpcodeInfo::all, {0x66, 0x0F, 0xD1, _r}, {xmm64, xmm_m64}, DU_U },
1827+ {OpcodeInfo::all, {0x66, 0x0F, 0x71, _2, ib}, {xmm64, imm8}, DU_U },
1828+END_OPCODES()
1829+END_MNEMONIC()
1830+
1831+BEGIN_MNEMONIC(PSRLD, MF_NONE, DU_U)
1832+BEGIN_OPCODES()
1833+ {OpcodeInfo::all, {0x66, 0x0F, 0xD2, _r}, {xmm64, xmm_m64}, DU_U },
1834+ {OpcodeInfo::all, {0x66, 0x0F, 0x72, _2, ib}, {xmm64, imm8}, DU_U },
1835+END_OPCODES()
1836+END_MNEMONIC()
1837+
1838+BEGIN_MNEMONIC(PMOVSXBW, MF_NONE, DU_U)
1839+BEGIN_OPCODES()
1840+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x20, _r}, {xmm64, xmm_m64}, DU_U },
1841+END_OPCODES()
1842+END_MNEMONIC()
1843+
1844+BEGIN_MNEMONIC(PSHUFB, MF_NONE, DU_U)
1845+BEGIN_OPCODES()
1846+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x00, _r}, {xmm64, xmm_m64}, DU_U },
1847+END_OPCODES()
1848+END_MNEMONIC()
1849+
1850+BEGIN_MNEMONIC(PSHUFD, MF_NONE, D_U_U)
1851+BEGIN_OPCODES()
1852+ {OpcodeInfo::all, {0x66, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U },
1853+END_OPCODES()
1854+END_MNEMONIC()
1855+
1856+BEGIN_MNEMONIC(PSHUFLW, MF_NONE, D_U_U)
1857+BEGIN_OPCODES()
1858+ {OpcodeInfo::all, {0xF2, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U },
1859+END_OPCODES()
1860+END_MNEMONIC()
1861+
1862+BEGIN_MNEMONIC(PSHUFHW, MF_NONE, D_U_U)
1863+BEGIN_OPCODES()
1864+ {OpcodeInfo::all, {0xF3, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U },
1865+END_OPCODES()
1866+END_MNEMONIC()
1867+
1868+BEGIN_MNEMONIC(PHADDSW, MF_NONE, DU_U)
1869+BEGIN_OPCODES()
1870+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x03, _r}, {xmm64, xmm_m64}, DU_U },
1871+END_OPCODES()
1872+END_MNEMONIC()
1873+
1874+BEGIN_MNEMONIC(PHADDW, MF_NONE, DU_U)
1875+BEGIN_OPCODES()
1876+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x01, _r}, {xmm64, xmm_m64}, DU_U },
1877+END_OPCODES()
1878+END_MNEMONIC()
1879+
1880+BEGIN_MNEMONIC(PHADDD, MF_NONE, DU_U)
1881+BEGIN_OPCODES()
1882+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x02, _r}, {xmm64, xmm_m64}, DU_U },
1883+END_OPCODES()
1884+END_MNEMONIC()
1885+
1886+BEGIN_MNEMONIC(PHSUBSW, MF_NONE, DU_U)
1887+BEGIN_OPCODES()
1888+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x07, _r}, {xmm64, xmm_m64}, DU_U },
1889+END_OPCODES()
1890+END_MNEMONIC()
1891+
1892+BEGIN_MNEMONIC(PHSUBW, MF_NONE, DU_U)
1893+BEGIN_OPCODES()
1894+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x05, _r}, {xmm64, xmm_m64}, DU_U },
1895+END_OPCODES()
1896+END_MNEMONIC()
1897+
1898+BEGIN_MNEMONIC(PHSUBD, MF_NONE, DU_U)
1899+BEGIN_OPCODES()
1900+ {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x06, _r}, {xmm64, xmm_m64}, DU_U },
1901+END_OPCODES()
1902+END_MNEMONIC()
1903+
1904+BEGIN_MNEMONIC(PEXTRB, MF_NONE, D_U_U)
1905+BEGIN_OPCODES()
1906+ {OpcodeInfo::all, {0x66, 0x0F, 0x3A, 0x14, _r, ib}, {r32, xmm64, imm8}, D_U_U },
1907+END_OPCODES()
1908+END_MNEMONIC()
1909+
1910+BEGIN_MNEMONIC(PEXTRW, MF_NONE, D_U_U)
1911+BEGIN_OPCODES()
1912+ {OpcodeInfo::all, {0x66, 0x0F, 0xC5, _r, ib}, {r32, xmm64, imm8}, D_U_U },
1913+END_OPCODES()
1914+END_MNEMONIC()
1915+
1916+BEGIN_MNEMONIC(PEXTRD, MF_NONE, D_U_U)
1917+BEGIN_OPCODES()
1918+ {OpcodeInfo::all, {0x66, 0x0F, 0x3A, 0x16, _r, ib}, {r_m32, xmm64, imm8}, D_U_U },
1919+END_OPCODES()
1920+END_MNEMONIC()
1921+
1922+BEGIN_MNEMONIC(MOVDQA, MF_NONE|MF_SYMMETRIC, D_U)
1923+BEGIN_OPCODES()
1924+ {OpcodeInfo::all, {0x66, 0x0F, 0x6F, _r}, {xmm64, xmm_m64}, D_U },
1925+ //The encoder cannot properly look up when operands are symmetric but opcode is not:
1926+ //{OpcodeInfo::all, {0x66, 0x0F, 0x7F, _r}, {xmm_m128, xmm128}, D_U },
1927+END_OPCODES()
1928+END_MNEMONIC()
1929+
1930+}; // ~masterEncodingTable[]
1931+
1932+ENCODER_NAMESPACE_END
1933+
1934+ENCODER_NAMESPACE_START
1935+
1936+static int compareMnemonicInfo(const void* info1, const void* info2)
1937+{
1938+ Mnemonic id1, id2;
1939+
1940+ id1 = ((const MnemonicInfo*) info1)->mn;
1941+ id2 = ((const MnemonicInfo*) info2)->mn;
1942+ if (id1 < id2)
1943+ return -1;
1944+ if (id1 > id2)
1945+ return 1;
1946+ return 0;
1947+}
1948+
1949+int EncoderBase::buildTable(void)
1950+{
1951+ // A check: all mnemonics must be covered
1952+ assert(COUNTOF(masterEncodingTable) == Mnemonic_Count);
1953+
1954+ // sort out the mnemonics so the list become ordered
1955+ qsort(masterEncodingTable, Mnemonic_Count, sizeof(MnemonicInfo), compareMnemonicInfo);
1956+
1957+ //
1958+ // clear the things
1959+ //
1960+ memset(opcodesHashMap, NOHASH, sizeof(opcodesHashMap));
1961+ memset(opcodes, 0, sizeof(opcodes));
1962+ //
1963+ // and, finally, build it
1964+ for (unsigned i=0; i<Mnemonic_Count; i++) {
1965+ assert((Mnemonic)i == (masterEncodingTable + i)->mn);
1966+ buildMnemonicDesc(masterEncodingTable+i);
1967+ }
1968+ return 0;
1969+}
1970+
1971+void EncoderBase::buildMnemonicDesc(const MnemonicInfo * minfo)
1972+{
1973+ MnemonicDesc& mdesc = mnemonics[minfo->mn];
1974+ mdesc.mn = minfo->mn;
1975+ mdesc.flags = minfo->flags;
1976+ mdesc.roles = minfo->roles;
1977+ mdesc.name = minfo->name;
1978+
1979+ //
1980+ // fill the used opcodes
1981+ //
1982+ for (unsigned i=0, oindex=0; i<COUNTOF(minfo->opcodes); i++) {
1983+
1984+ const OpcodeInfo& oinfo = minfo->opcodes[i];
1985+ OpcodeDesc& odesc = opcodes[minfo->mn][oindex];
1986+ // last opcode ?
1987+ if (oinfo.opcode[0] == OpcodeByteKind_LAST) {
1988+ // mark the opcode 'last', exit
1989+ odesc.opcode_len = 0;
1990+ odesc.last = 1;
1991+ break;
1992+ }
1993+ odesc.last = 0;
1994+#ifdef _EM64T_
1995+ if (oinfo.platf == OpcodeInfo::ia32) { continue; }
1996+ if (oinfo.platf == OpcodeInfo::decoder32) { continue; }
1997+#else
1998+ if (oinfo.platf == OpcodeInfo::em64t) { continue; }
1999+ if (oinfo.platf == OpcodeInfo::decoder64) { continue; }
2000+#endif
2001+ if (oinfo.platf == OpcodeInfo::decoder64 ||
2002+ oinfo.platf == OpcodeInfo::decoder32) {
2003+ odesc.platf = OpcodeInfo::decoder;
2004+ }
2005+ else {
2006+ odesc.platf = (char)oinfo.platf;
2007+ }
2008+ //
2009+ // fill out opcodes
2010+ //
2011+ unsigned j = 0;
2012+ odesc.opcode_len = 0;
2013+ for(; oinfo.opcode[j]; j++) {
2014+ unsigned opcod = oinfo.opcode[j];
2015+ unsigned kind = opcod&OpcodeByteKind_KindMask;
2016+ if (kind == OpcodeByteKind_REX_W) {
2017+ odesc.opcode[odesc.opcode_len++] = (unsigned char)0x48;
2018+ continue;
2019+ }
2020+ else if(kind != 0 && kind != OpcodeByteKind_ZeroOpcodeByte) {
2021+ break;
2022+ }
2023+ unsigned lowByte = (opcod & OpcodeByteKind_OpcodeMask);
2024+ odesc.opcode[odesc.opcode_len++] = (unsigned char)lowByte;
2025+ }
2026+ assert(odesc.opcode_len<5);
2027+ odesc.aux0 = odesc.aux1 = 0;
2028+ if (oinfo.opcode[j] != 0) {
2029+ odesc.aux0 = oinfo.opcode[j];
2030+ assert((odesc.aux0 & OpcodeByteKind_KindMask) != 0);
2031+ ++j;
2032+ if(oinfo.opcode[j] != 0) {
2033+ odesc.aux1 = oinfo.opcode[j];
2034+ assert((odesc.aux1 & OpcodeByteKind_KindMask) != 0);
2035+ }
2036+ }
2037+ else if (oinfo.roles.count>=2) {
2038+ if (((oinfo.opnds[0].kind&OpndKind_Mem) &&
2039+ (isRegKind(oinfo.opnds[1].kind))) ||
2040+ ((oinfo.opnds[1].kind&OpndKind_Mem) &&
2041+ (isRegKind(oinfo.opnds[0].kind)))) {
2042+ // Example: MOVQ xmm1, xmm/m64 has only opcodes
2043+ // same with SHRD
2044+ // Adding fake /r
2045+ odesc.aux0 = _r;
2046+ }
2047+ }
2048+ else if (oinfo.roles.count==1) {
2049+ if (oinfo.opnds[0].kind&OpndKind_Mem) {
2050+ // Example: SETcc r/m8, adding fake /0
2051+ odesc.aux0 = _0;
2052+ }
2053+ }
2054+ // check imm
2055+ if (oinfo.roles.count > 0 &&
2056+ (oinfo.opnds[0].kind == OpndKind_Imm ||
2057+ oinfo.opnds[oinfo.roles.count-1].kind == OpndKind_Imm)) {
2058+ // Example: CALL cd, PUSH imm32 - they fit both opnds[0] and
2059+ // opnds[oinfo.roles.count-1].
2060+ // The A3 opcode fits only opnds[0] - it's currently have
2061+ // MOV imm32, EAX. Looks ridiculous, but this is how the
2062+ // moffset is currently implemented. Will need to fix together
2063+ // with other usages of moff.
2064+ // adding fake /cd or fake /id
2065+ unsigned imm_opnd_index =
2066+ oinfo.opnds[0].kind == OpndKind_Imm ? 0 : oinfo.roles.count-1;
2067+ OpndSize sz = oinfo.opnds[imm_opnd_index].size;
2068+ unsigned imm_encode, coff_encode;
2069+ if (sz==OpndSize_8) {imm_encode = ib; coff_encode=cb; }
2070+ else if (sz==OpndSize_16) {imm_encode = iw; coff_encode=cw;}
2071+ else if (sz==OpndSize_32) {imm_encode = id; coff_encode=cd; }
2072+ else if (sz==OpndSize_64) {imm_encode = io; coff_encode=0xCC; }
2073+ else { assert(false); imm_encode=0xCC; coff_encode=0xCC; }
2074+ if (odesc.aux1 == 0) {
2075+ if (odesc.aux0==0) {
2076+ odesc.aux0 = imm_encode;
2077+ }
2078+ else {
2079+ if (odesc.aux0 != imm_encode && odesc.aux0 != coff_encode) {
2080+ odesc.aux1 = imm_encode;
2081+ }
2082+ }
2083+ }
2084+ else {
2085+ assert(odesc.aux1==imm_encode);
2086+ }
2087+
2088+ }
2089+
2090+ assert(sizeof(odesc.opnds) == sizeof(oinfo.opnds));
2091+ memcpy(odesc.opnds, oinfo.opnds,
2092+ sizeof(EncoderBase::OpndDesc)
2093+ * EncoderBase::MAX_NUM_OPCODE_OPERANDS);
2094+ odesc.roles = oinfo.roles;
2095+ odesc.first_opnd = 0;
2096+ if (odesc.opnds[0].reg != RegName_Null) {
2097+ ++odesc.first_opnd;
2098+ if (odesc.opnds[1].reg != RegName_Null) {
2099+ ++odesc.first_opnd;
2100+ }
2101+ }
2102+
2103+ if (odesc.platf == OpcodeInfo::decoder) {
2104+ // if the opcode is only for decoding info, then do not hash it.
2105+ ++oindex;
2106+ continue;
2107+ }
2108+
2109+ //
2110+ // check whether the operand info is a mask (i.e. r_m*).
2111+ // in this case, split the info to have separate entries for 'r'
2112+ // and for 'm'.
2113+ // the good news is that there can be only one such operand.
2114+ //
2115+ int opnd2split = -1;
2116+ for (unsigned k=0; k<oinfo.roles.count; k++) {
2117+ if ((oinfo.opnds[k].kind & OpndKind_Mem) &&
2118+ (OpndKind_Mem != oinfo.opnds[k].kind)) {
2119+ opnd2split = k;
2120+ break;
2121+ }
2122+ };
2123+
2124+ if (opnd2split == -1) {
2125+ // not a mask, hash it, store it, continue.
2126+ unsigned short hash = getHash(&oinfo);
2127+ opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
2128+ ++oindex;
2129+ continue;
2130+ };
2131+
2132+ OpcodeInfo storeItem = oinfo;
2133+ unsigned short hash;
2134+
2135+ // remove the memory part of the mask, and store only 'r' part
2136+ storeItem.opnds[opnd2split].kind = (OpndKind)(storeItem.opnds[opnd2split].kind & ~OpndKind_Mem);
2137+ hash = getHash(&storeItem);
2138+ if (opcodesHashMap[minfo->mn][hash] == NOHASH) {
2139+ opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
2140+ }
2141+ // else {
2142+ // do not overwrite if there is something there, just check that operands match
2143+ // the reason is that for some instructions there are several possibilities:
2144+ // say 'DEC r' may be encode as either '48+r' or 'FF /1', and I believe
2145+ // the first one is better for 'dec r'.
2146+ // as we're currently processing an opcode with memory part in operand,
2147+ // leave already filled items intact, so if there is 'OP reg' there, this
2148+ // better choice will be left in the table instead of 'OP r_m'
2149+ // }
2150+
2151+ // compute hash of memory-based operand, 'm' part in 'r_m'
2152+ storeItem.opnds[opnd2split].kind = OpndKind_Mem;
2153+ hash = getHash(&storeItem);
2154+ // should not happen: for the r_m opcodes, there is a possibility
2155+ // that hash value of 'r' part intersects with 'OP r' value, but it's
2156+ // impossible for 'm' part.
2157+ assert(opcodesHashMap[minfo->mn][hash] == NOHASH);
2158+ opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
2159+
2160+ ++oindex;
2161+ }
2162+}
2163+
2164+ENCODER_NAMESPACE_END
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.cpp
@@ -0,0 +1,836 @@
1+/*
2+ * Copyright (C) 2012 The Android Open Source Project
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+
17+#include <stdio.h>
18+#include <assert.h>
19+#include <limits.h>
20+#include "enc_base.h"
21+#include "enc_wrapper.h"
22+#include "dec_base.h"
23+#include "utils/Log.h"
24+
25+//#define PRINT_ENCODER_STREAM
26+bool dump_x86_inst = false;
27+
28+/**
29+ * @brief Provides mapping between PhysicalReg and RegName used by encoder
30+ * @param physicalReg The physical register
31+ * @return Returns encoder's register name
32+ */
33+static RegName mapFromPhysicalReg (int physicalReg)
34+{
35+ RegName reg = RegName_Null;
36+
37+ //Get mapping between PhysicalReg and RegName
38+ switch (physicalReg)
39+ {
40+ case PhysicalReg_EAX:
41+ reg = RegName_EAX;
42+ break;
43+ case PhysicalReg_EBX:
44+ reg = RegName_EBX;
45+ break;
46+ case PhysicalReg_ECX:
47+ reg = RegName_ECX;
48+ break;
49+ case PhysicalReg_EDX:
50+ reg = RegName_EDX;
51+ break;
52+ case PhysicalReg_EDI:
53+ reg = RegName_EDI;
54+ break;
55+ case PhysicalReg_ESI:
56+ reg = RegName_ESI;
57+ break;
58+ case PhysicalReg_ESP:
59+ reg = RegName_ESP;
60+ break;
61+ case PhysicalReg_EBP:
62+ reg = RegName_EBP;
63+ break;
64+ case PhysicalReg_XMM0:
65+ reg = RegName_XMM0;
66+ break;
67+ case PhysicalReg_XMM1:
68+ reg = RegName_XMM1;
69+ break;
70+ case PhysicalReg_XMM2:
71+ reg = RegName_XMM2;
72+ break;
73+ case PhysicalReg_XMM3:
74+ reg = RegName_XMM3;
75+ break;
76+ case PhysicalReg_XMM4:
77+ reg = RegName_XMM4;
78+ break;
79+ case PhysicalReg_XMM5:
80+ reg = RegName_XMM5;
81+ break;
82+ case PhysicalReg_XMM6:
83+ reg = RegName_XMM6;
84+ break;
85+ case PhysicalReg_XMM7:
86+ reg = RegName_XMM7;
87+ break;
88+ default:
89+ //We have no mapping
90+ reg = RegName_Null;
91+ break;
92+ }
93+
94+ return reg;
95+}
96+
97+//getRegSize, getAliasReg:
98+//OpndSize, RegName, OpndExt: enum enc_defs.h
99+inline void add_r(EncoderBase::Operands & args, int physicalReg, OpndSize sz, OpndExt ext = OpndExt_None) {
100+ if (sz == OpndSize_128)
101+ {
102+ //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined
103+ //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still
104+ //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx.
105+ sz = OpndSize_64;
106+ }
107+
108+ RegName reg = mapFromPhysicalReg (physicalReg);
109+ if (sz != getRegSize(reg)) {
110+ reg = getAliasReg(reg, sz);
111+ }
112+ args.add(EncoderBase::Operand(reg, ext));
113+}
114+inline void add_m(EncoderBase::Operands & args, int baseReg, int disp, OpndSize sz, OpndExt ext = OpndExt_None) {
115+ if (sz == OpndSize_128)
116+ {
117+ //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined
118+ //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still
119+ //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx.
120+ sz = OpndSize_64;
121+ }
122+
123+ args.add(EncoderBase::Operand(sz,
124+ mapFromPhysicalReg (baseReg),
125+ RegName_Null, 0,
126+ disp, ext));
127+}
128+inline void add_m_scale(EncoderBase::Operands & args, int baseReg, int indexReg, int scale,
129+ OpndSize sz, OpndExt ext = OpndExt_None) {
130+ if (sz == OpndSize_128)
131+ {
132+ //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined
133+ //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still
134+ //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx.
135+ sz = OpndSize_64;
136+ }
137+
138+ args.add(EncoderBase::Operand(sz,
139+ mapFromPhysicalReg (baseReg),
140+ mapFromPhysicalReg (indexReg), scale,
141+ 0, ext));
142+}
143+inline void add_m_disp_scale(EncoderBase::Operands & args, int baseReg, int disp, int indexReg, int scale,
144+ OpndSize sz, OpndExt ext = OpndExt_None) {
145+ if (sz == OpndSize_128)
146+ {
147+ //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined
148+ //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still
149+ //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx.
150+ sz = OpndSize_64;
151+ }
152+
153+ args.add(EncoderBase::Operand(sz,
154+ mapFromPhysicalReg (baseReg),
155+ mapFromPhysicalReg (indexReg), scale,
156+ disp, ext));
157+}
158+
159+inline void add_fp(EncoderBase::Operands & args, unsigned i, bool dbl) {
160+ return args.add((RegName)( (dbl ? RegName_FP0D : RegName_FP0S) + i));
161+}
162+inline void add_imm(EncoderBase::Operands & args, OpndSize sz, int value, bool is_signed) {
163+ //assert(n_size != imm.get_size());
164+ args.add(EncoderBase::Operand(sz, value,
165+ is_signed ? OpndExt_Signed : OpndExt_Zero));
166+}
167+
168+#define MAX_DECODED_STRING_LEN 1024
169+char tmpBuffer[MAX_DECODED_STRING_LEN];
170+
171+void printOperand(const EncoderBase::Operand & opnd) {
172+ unsigned int sz;
173+ if(!dump_x86_inst) return;
174+ sz = strlen(tmpBuffer);
175+ if(opnd.size() != OpndSize_32) {
176+ const char * opndSizeString = getOpndSizeString(opnd.size());
177+
178+ if (opndSizeString == NULL) {
179+ // If the string that represents operand size is null it means that
180+ // the operand size is an invalid value. Although this could be a
181+ // problem if instruction is corrupted, technically failing to
182+ // disassemble is not fatal. Thus, let's warn but proceed with using
183+ // an empty string.
184+ ALOGW("JIT-WARNING: Cannot decode instruction operand size.");
185+ opndSizeString = "";
186+ }
187+
188+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN - sz, "%s ",
189+ opndSizeString);
190+ }
191+ if(opnd.is_mem()) {
192+ if(opnd.scale() != 0) {
193+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz,
194+ "%d(%s,%s,%d)", opnd.disp(),
195+ getRegNameString(opnd.base()),
196+ getRegNameString(opnd.index()), opnd.scale());
197+ } else {
198+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%d(%s)",
199+ opnd.disp(), getRegNameString(opnd.base()));
200+ }
201+ }
202+ if(opnd.is_imm()) {
203+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "#%x",
204+ (int)opnd.imm());
205+ }
206+ if(opnd.is_reg()) {
207+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%s",
208+ getRegNameString(opnd.reg()));
209+ }
210+}
211+//TODO: the order of operands
212+//to make the printout have the same order as assembly in .S
213+//I reverse the order here
214+void printDecoderInst(Inst & decInst) {
215+ unsigned int sz;
216+ if(!dump_x86_inst) return;
217+ sz = strlen(tmpBuffer);
218+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%s ",
219+ EncoderBase::toStr(decInst.mn));
220+ for(unsigned int k = 0; k < decInst.argc; k++) {
221+ if(k > 0) {
222+ sz = strlen(tmpBuffer);
223+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, ", ");
224+ }
225+ printOperand(decInst.operands[decInst.argc-1-k]);
226+ }
227+ ALOGE("%s", tmpBuffer);
228+}
229+void printOperands(EncoderBase::Operands& opnds) {
230+ unsigned int sz;
231+ if(!dump_x86_inst) return;
232+ for(unsigned int k = 0; k < opnds.count(); k++) {
233+ if(k > 0) {
234+ sz = strlen(tmpBuffer);
235+ sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, ", ");
236+ }
237+ printOperand(opnds[opnds.count()-1-k]);
238+ }
239+}
240+void printEncoderInst(Mnemonic m, EncoderBase::Operands& opnds) {
241+ if(!dump_x86_inst) return;
242+ snprintf(tmpBuffer, MAX_DECODED_STRING_LEN, "--- ENC %s ",
243+ EncoderBase::toStr(m));
244+ printOperands(opnds);
245+ ALOGE("%s", tmpBuffer);
246+}
247+int decodeThenPrint(char* stream_start) {
248+ if(!dump_x86_inst) return 0;
249+ snprintf(tmpBuffer, MAX_DECODED_STRING_LEN, "--- INST @ %p: ",
250+ stream_start);
251+ Inst decInst;
252+ unsigned numBytes = DecoderBase::decode(stream_start, &decInst);
253+ printDecoderInst(decInst);
254+ return numBytes;
255+}
256+
257+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm(Mnemonic m, OpndSize size, int imm, char * stream) {
258+ EncoderBase::Operands args;
259+ //assert(imm.get_size() == size_32);
260+ add_imm(args, size, imm, true/*is_signed*/);
261+#ifdef PRINT_ENCODER_STREAM
262+ char* stream_start = stream;
263+#endif
264+ stream = (char *)EncoderBase::encode(stream, m, args);
265+#ifdef PRINT_ENCODER_STREAM
266+ printEncoderInst(m, args);
267+ decodeThenPrint(stream_start);
268+#endif
269+ return stream;
270+}
271+extern "C" ENCODER_DECLARE_EXPORT unsigned encoder_get_inst_size(char * stream) {
272+ Inst decInst;
273+ unsigned numBytes = DecoderBase::decode(stream, &decInst);
274+ return numBytes;
275+}
276+
277+extern "C" ENCODER_DECLARE_EXPORT uintptr_t encoder_get_cur_operand_offset(int opnd_id)
278+{
279+ return (uintptr_t)EncoderBase::getOpndLocation(opnd_id);
280+}
281+
282+extern "C" ENCODER_DECLARE_EXPORT char * encoder_update_imm(int imm, char * stream) {
283+ Inst decInst;
284+ EncoderBase::Operands args;
285+
286+ //Decode the instruction
287+ DecoderBase::decode(stream, &decInst);
288+
289+ add_imm(args, decInst.operands[0].size(), imm, true/*is_signed*/);
290+ char* stream_next = (char *)EncoderBase::encode(stream, decInst.mn, args);
291+#ifdef PRINT_ENCODER_STREAM
292+ printEncoderInst(decInst.mn, args);
293+ decodeThenPrint(stream);
294+#endif
295+ return stream_next;
296+}
297+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem(Mnemonic m, OpndSize size,
298+ int disp, int base_reg, bool isBasePhysical, char * stream) {
299+ EncoderBase::Operands args;
300+ add_m(args, base_reg, disp, size);
301+#ifdef PRINT_ENCODER_STREAM
302+ char* stream_start = stream;
303+#endif
304+ stream = (char *)EncoderBase::encode(stream, m, args);
305+#ifdef PRINT_ENCODER_STREAM
306+ printEncoderInst(m, args);
307+ decodeThenPrint(stream_start);
308+#endif
309+ return stream;
310+}
311+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg(Mnemonic m, OpndSize size,
312+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
313+ EncoderBase::Operands args;
314+ if(m == Mnemonic_DIV || m == Mnemonic_IDIV || m == Mnemonic_MUL || m == Mnemonic_IMUL) {
315+ add_r(args, 0/*eax*/, size);
316+ add_r(args, 3/*edx*/, size);
317+ }
318+ add_r(args, reg, size);
319+#ifdef PRINT_ENCODER_STREAM
320+ char* stream_start = stream;
321+#endif
322+ stream = (char *)EncoderBase::encode(stream, m, args);
323+#ifdef PRINT_ENCODER_STREAM
324+ printEncoderInst(m, args);
325+ decodeThenPrint(stream_start);
326+#endif
327+ return stream;
328+}
329+//! \brief Allows for different operand sizes
330+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg(Mnemonic m, OpndSize size,
331+ int imm, int reg, bool isPhysical, LowOpndRegType type, char * stream) {
332+ return encoder_imm_reg_diff_sizes(m, size, imm, size, reg, isPhysical, type, stream);
333+}
334+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_reg_diff_sizes(Mnemonic m, OpndSize srcOpndSize,
335+ int reg, bool isPhysical, OpndSize destOpndSize,
336+ int reg2, bool isPhysical2, LowOpndRegType type, char * stream) {
337+ if((m == Mnemonic_MOV || m == Mnemonic_MOVQ || m == Mnemonic_MOVD) && reg == reg2) return stream;
338+ EncoderBase::Operands args;
339+ add_r(args, reg2, destOpndSize); //destination
340+ if(m == Mnemonic_SAL || m == Mnemonic_SHR || m == Mnemonic_SHL || m == Mnemonic_SAR)
341+ add_r(args, reg, OpndSize_8);
342+ else
343+ add_r(args, reg, srcOpndSize);
344+#ifdef PRINT_ENCODER_STREAM
345+ char* stream_start = stream;
346+#endif
347+ stream = (char *)EncoderBase::encode(stream, m, args);
348+#ifdef PRINT_ENCODER_STREAM
349+ printEncoderInst(m, args);
350+ decodeThenPrint(stream_start);
351+#endif
352+ return stream;
353+}
354+//both operands have same size
355+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_reg(Mnemonic m, OpndSize size,
356+ int reg, bool isPhysical,
357+ int reg2, bool isPhysical2, LowOpndRegType type, char * stream) {
358+ return encoder_reg_reg_diff_sizes(m, size, reg, isPhysical, size, reg2, isPhysical2, type, stream);
359+}
360+//! \brief Allows for different operand sizes
361+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize,
362+ int disp, int base_reg, bool isBasePhysical, OpndSize regOpndSize,
363+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
364+ EncoderBase::Operands args;
365+ add_r(args, reg, regOpndSize);
366+ add_m(args, base_reg, disp, memOpndSize);
367+#ifdef PRINT_ENCODER_STREAM
368+ char* stream_start = stream;
369+#endif
370+ stream = (char *)EncoderBase::encode(stream, m, args);
371+#ifdef PRINT_ENCODER_STREAM
372+ printEncoderInst(m, args);
373+ decodeThenPrint(stream_start);
374+#endif
375+ return stream;
376+}
377+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_reg(Mnemonic m, OpndSize size,
378+ int disp, int base_reg, bool isBasePhysical,
379+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
380+ return encoder_mem_to_reg_diff_sizes(m, size, disp, base_reg, isBasePhysical, size, reg, isPhysical, type, stream);
381+}
382+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_scale_reg(Mnemonic m, OpndSize size,
383+ int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale,
384+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
385+ EncoderBase::Operands args;
386+ add_r(args, reg, size);
387+ add_m_scale(args, base_reg, index_reg, scale, size);
388+#ifdef PRINT_ENCODER_STREAM
389+ char* stream_start = stream;
390+#endif
391+ stream = (char *)EncoderBase::encode(stream, m, args);
392+#ifdef PRINT_ENCODER_STREAM
393+ printEncoderInst(m, args);
394+ decodeThenPrint(stream_start);
395+#endif
396+ return stream;
397+}
398+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_mem_scale(Mnemonic m, OpndSize size,
399+ int reg, bool isPhysical,
400+ int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale,
401+ LowOpndRegType type, char * stream) {
402+ EncoderBase::Operands args;
403+ add_m_scale(args, base_reg, index_reg, scale, size);
404+ add_r(args, reg, size);
405+#ifdef PRINT_ENCODER_STREAM
406+ char* stream_start = stream;
407+#endif
408+ stream = (char *)EncoderBase::encode(stream, m, args);
409+#ifdef PRINT_ENCODER_STREAM
410+ printEncoderInst(m, args);
411+ decodeThenPrint(stream_start);
412+#endif
413+ return stream;
414+}
415+//! \brief Allows for different operand sizes
416+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize,
417+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
418+ OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream) {
419+ EncoderBase::Operands args;
420+ add_r(args, reg, regOpndSize);
421+ add_m_disp_scale(args, base_reg, disp, index_reg, scale, memOpndSize);
422+#ifdef PRINT_ENCODER_STREAM
423+ char* stream_start = stream;
424+#endif
425+ stream = (char *)EncoderBase::encode(stream, m, args);
426+#ifdef PRINT_ENCODER_STREAM
427+ printEncoderInst(m, args);
428+ decodeThenPrint(stream_start);
429+#endif
430+ return stream;
431+}
432+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_reg(Mnemonic m, OpndSize size,
433+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
434+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
435+ return encoder_mem_disp_scale_to_reg_diff_sizes(m, size, base_reg, isBasePhysical,
436+ disp, index_reg, isIndexPhysical, scale, size, reg, isPhysical,
437+ type, stream);
438+}
439+extern "C" ENCODER_DECLARE_EXPORT char * encoder_movzs_mem_disp_scale_reg(Mnemonic m, OpndSize size,
440+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
441+ int reg, bool isPhysical, LowOpndRegType type, char * stream) {
442+ EncoderBase::Operands args;
443+ add_r(args, reg, OpndSize_32);
444+ add_m_disp_scale(args, base_reg, disp, index_reg, scale, size);
445+#ifdef PRINT_ENCODER_STREAM
446+ char* stream_start = stream;
447+#endif
448+ stream = (char *)EncoderBase::encode(stream, m, args);
449+#ifdef PRINT_ENCODER_STREAM
450+ printEncoderInst(m, args);
451+ decodeThenPrint(stream_start);
452+#endif
453+ return stream;
454+}
455+extern "C" ENCODER_DECLARE_EXPORT char* encoder_reg_mem_disp_scale(Mnemonic m, OpndSize size,
456+ int reg, bool isPhysical,
457+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
458+ LowOpndRegType type, char* stream) {
459+ EncoderBase::Operands args;
460+ add_m_disp_scale(args, base_reg, disp, index_reg, scale, size);
461+ add_r(args, reg, size);
462+#ifdef PRINT_ENCODER_STREAM
463+ char* stream_start = stream;
464+#endif
465+ stream = (char *)EncoderBase::encode(stream, m, args);
466+#ifdef PRINT_ENCODER_STREAM
467+ printEncoderInst(m, args);
468+ decodeThenPrint(stream_start);
469+#endif
470+ return stream;
471+}
472+
473+extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_mem(Mnemonic m, OpndSize size,
474+ int reg, bool isPhysical,
475+ int disp, int base_reg, bool isBasePhysical, LowOpndRegType type, char * stream) {
476+ EncoderBase::Operands args;
477+ add_m(args, base_reg, disp, size);
478+ add_r(args, reg, size);
479+#ifdef PRINT_ENCODER_STREAM
480+ char* stream_start = stream;
481+#endif
482+ if (m == Mnemonic_CMPXCHG ){
483+ //CMPXCHG require EAX as args
484+ add_r(args,PhysicalReg_EAX,size);
485+ //Add lock prefix for CMPXCHG, guarantee the atomic of CMPXCHG in multi-core platform
486+ stream = (char *)EncoderBase::prefix(stream, InstPrefix_LOCK);
487+ }
488+ stream = (char *)EncoderBase::encode(stream, m, args);
489+#ifdef PRINT_ENCODER_STREAM
490+ printEncoderInst(m, args);
491+ decodeThenPrint(stream_start);
492+#endif
493+ return stream;
494+}
495+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg_diff_sizes (Mnemonic m, OpndSize sizeImm, int imm,
496+ OpndSize sizeReg, int reg, bool isPhysical, LowOpndRegType type, char * stream)
497+{
498+ //Create the operands
499+ EncoderBase::Operands args;
500+ //Add destination register
501+ add_r (args, reg, sizeReg);
502+ //For imul, we need to add implicit register explicitly
503+ if (m == Mnemonic_IMUL)
504+ {
505+ add_r (args, reg, sizeReg);
506+ }
507+ //Finally add the immediate
508+ add_imm (args, sizeImm, imm, true/*is_signed*/);
509+
510+#ifdef PRINT_ENCODER_STREAM
511+ char* stream_start = stream;
512+#endif
513+
514+ //Now do the encoding
515+ stream = EncoderBase::encode (stream, m, args);
516+
517+#ifdef PRINT_ENCODER_STREAM
518+ printEncoderInst(m, args);
519+ decodeThenPrint(stream_start);
520+#endif
521+
522+ return stream;
523+}
524+extern "C" ENCODER_DECLARE_EXPORT char * encoder_update_imm_rm(int imm, char * stream) {
525+ Inst decInst;
526+ EncoderBase::Operands args;
527+
528+ //Decode the instruction
529+ DecoderBase::decode(stream, &decInst);
530+
531+ args.add(decInst.operands[0]);
532+ add_imm(args, decInst.operands[1].size(), imm, true/*is_signed*/);
533+ char* stream_next = (char *)EncoderBase::encode(stream, decInst.mn, args);
534+#ifdef PRINT_ENCODER_STREAM
535+ printEncoderInst(decInst.mn, args);
536+ decodeThenPrint(stream);
537+#endif
538+ return stream_next;
539+}
540+
541+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_mem(Mnemonic m, OpndSize size,
542+ int imm,
543+ int disp, int base_reg, bool isBasePhysical, char * stream) {
544+ return encoder_imm_mem_diff_sizes(m, size, imm, size, disp, base_reg, isBasePhysical, stream);
545+}
546+
547+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_mem_diff_sizes (Mnemonic m, OpndSize immOpndSize, int imm,
548+ OpndSize memOpndSize, int disp, int baseRegister, bool isBasePhysical, char * stream)
549+{
550+ //Add operands
551+ EncoderBase::Operands args;
552+ add_m (args, baseRegister, disp, memOpndSize);
553+ add_imm (args, immOpndSize, imm, true);
554+
555+#ifdef PRINT_ENCODER_STREAM
556+ char* stream_start = stream;
557+#endif
558+
559+ //Do the encoding
560+ stream = EncoderBase::encode (stream, m, args);
561+
562+#ifdef PRINT_ENCODER_STREAM
563+ printEncoderInst(m, args);
564+ decodeThenPrint(stream_start);
565+#endif
566+
567+ return stream;
568+}
569+
570+extern "C" ENCODER_DECLARE_EXPORT char * encoder_fp_mem(Mnemonic m, OpndSize size, int reg,
571+ int disp, int base_reg, bool isBasePhysical, char * stream) {
572+ EncoderBase::Operands args;
573+ add_m(args, base_reg, disp, size);
574+ // a fake FP register as operand
575+ add_fp(args, reg, size == OpndSize_64/*is_double*/);
576+#ifdef PRINT_ENCODER_STREAM
577+ char* stream_start = stream;
578+#endif
579+ stream = (char *)EncoderBase::encode(stream, m, args);
580+#ifdef PRINT_ENCODER_STREAM
581+ printEncoderInst(m, args);
582+ decodeThenPrint(stream_start);
583+#endif
584+ return stream;
585+}
586+extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_fp(Mnemonic m, OpndSize size,
587+ int disp, int base_reg, bool isBasePhysical,
588+ int reg, char * stream) {
589+ EncoderBase::Operands args;
590+ // a fake FP register as operand
591+ add_fp(args, reg, size == OpndSize_64/*is_double*/);
592+ add_m(args, base_reg, disp, size);
593+#ifdef PRINT_ENCODER_STREAM
594+ char* stream_start = stream;
595+#endif
596+ stream = (char *)EncoderBase::encode(stream, m, args);
597+#ifdef PRINT_ENCODER_STREAM
598+ printEncoderInst(m, args);
599+ decodeThenPrint(stream_start);
600+#endif
601+ return stream;
602+}
603+
604+extern "C" ENCODER_DECLARE_EXPORT char * encoder_return(char * stream) {
605+ EncoderBase::Operands args;
606+#ifdef PRINT_ENCODER_STREAM
607+ char* stream_start = stream;
608+#endif
609+ stream = (char *)EncoderBase::encode(stream, Mnemonic_RET, args);
610+#ifdef PRINT_ENCODER_STREAM
611+ printEncoderInst(Mnemonic_RET, args);
612+ decodeThenPrint(stream_start);
613+#endif
614+ return stream;
615+}
616+extern "C" ENCODER_DECLARE_EXPORT char * encoder_compare_fp_stack(bool pop, int reg, bool isDouble, char * stream) {
617+ Mnemonic m = pop ? Mnemonic_FUCOMIP : Mnemonic_FUCOMI;
618+ //a single operand or 2 operands?
619+ //FST ST(i) has a single operand in encoder.inl?
620+ EncoderBase::Operands args;
621+ add_fp(args, reg, isDouble);
622+#ifdef PRINT_ENCODER_STREAM
623+ char* stream_start = stream;
624+#endif
625+ stream = (char *)EncoderBase::encode(stream, m, args);
626+#ifdef PRINT_ENCODER_STREAM
627+ printEncoderInst(m, args);
628+ decodeThenPrint(stream_start);
629+#endif
630+ return stream;
631+}
632+extern "C" ENCODER_DECLARE_EXPORT char * encoder_movez_mem_to_reg(OpndSize size,
633+ int disp, int base_reg, bool isBasePhysical,
634+ int reg, bool isPhysical, char * stream) {
635+ EncoderBase::Operands args;
636+ add_r(args, reg, OpndSize_32);
637+ add_m(args, base_reg, disp, size);
638+#ifdef PRINT_ENCODER_STREAM
639+ char* stream_start = stream;
640+#endif
641+ stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVZX, args);
642+#ifdef PRINT_ENCODER_STREAM
643+ printEncoderInst(Mnemonic_MOVZX, args);
644+ decodeThenPrint(stream_start);
645+#endif
646+ return stream;
647+}
648+extern "C" ENCODER_DECLARE_EXPORT char * encoder_moves_mem_to_reg(OpndSize size,
649+ int disp, int base_reg, bool isBasePhysical,
650+ int reg, bool isPhysical, char * stream) {
651+ EncoderBase::Operands args;
652+ add_r(args, reg, OpndSize_32);
653+ add_m(args, base_reg, disp, size);
654+#ifdef PRINT_ENCODER_STREAM
655+ char* stream_start = stream;
656+#endif
657+ stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVSX, args);
658+#ifdef PRINT_ENCODER_STREAM
659+ printEncoderInst(Mnemonic_MOVSX, args);
660+ decodeThenPrint(stream_start);
661+#endif
662+ return stream;
663+}
664+extern "C" ENCODER_DECLARE_EXPORT char * encoder_movez_reg_to_reg(OpndSize size,
665+ int reg, bool isPhysical, int reg2,
666+ bool isPhysical2, LowOpndRegType type, char * stream) {
667+ EncoderBase::Operands args;
668+ add_r(args, reg2, OpndSize_32); //destination
669+ add_r(args, reg, size);
670+#ifdef PRINT_ENCODER_STREAM
671+ char* stream_start = stream;
672+#endif
673+ stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVZX, args);
674+#ifdef PRINT_ENCODER_STREAM
675+ printEncoderInst(Mnemonic_MOVZX, args);
676+ decodeThenPrint(stream_start);
677+#endif
678+ return stream;
679+}
680+extern "C" ENCODER_DECLARE_EXPORT char * encoder_moves_reg_to_reg(OpndSize size,
681+ int reg, bool isPhysical,int reg2,
682+ bool isPhysical2, LowOpndRegType type, char * stream) {
683+ EncoderBase::Operands args;
684+ add_r(args, reg2, OpndSize_32); //destination
685+ add_r(args, reg, size);
686+#ifdef PRINT_ENCODER_STREAM
687+ char* stream_start = stream;
688+#endif
689+ stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVSX, args);
690+#ifdef PRINT_ENCODER_STREAM
691+ printEncoderInst(Mnemonic_MOVSX, args);
692+ decodeThenPrint(stream_start);
693+#endif
694+ return stream;
695+}
696+
697+extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg_reg (Mnemonic m, int imm, OpndSize immediateSize,
698+ int sourceReg, OpndSize sourceRegSize, int destReg, OpndSize destRegSize, char * stream)
699+{
700+ EncoderBase::Operands args;
701+
702+ //Add the source and destination registers
703+ add_r (args, destReg, destRegSize);
704+ add_r (args, sourceReg, sourceRegSize);
705+
706+ //Now add the immediate. We expect in three operand situation that immediate is last argument
707+ add_imm (args, immediateSize, imm, true/*is_signed*/);
708+
709+#ifdef PRINT_ENCODER_STREAM
710+ char* stream_start = stream;
711+#endif
712+
713+ //Do the actual encoding
714+ stream = EncoderBase::encode (stream, m, args);
715+
716+#ifdef PRINT_ENCODER_STREAM
717+ printEncoderInst (m, args);
718+ decodeThenPrint (stream_start);
719+#endif
720+
721+ //Return the updated stream pointer
722+ return stream;
723+}
724+
725+/**
726+ * @brief Generates variable sized nop instructions.
727+ * @param numBytes Number of bytes for the nop instruction. If this value is
728+ * larger than 9 bytes, more than one nop instruction will be generated.
729+ * @param stream Instruction stream where to place the nops
730+ * @return Updated instruction stream pointer after generating the nops
731+ */
732+extern "C" ENCODER_DECLARE_EXPORT char * encoder_nops(unsigned numBytes, char * stream) {
733+ return EncoderBase::nops(stream, numBytes);
734+}
735+
736+// Disassemble the operand "opnd" and put the readable format in "strbuf"
737+// up to a string length of "len".
738+unsigned int DisassembleOperandToBuf(const EncoderBase::Operand& opnd, char* strbuf, unsigned int len)
739+{
740+ unsigned int sz = 0;
741+ if(opnd.size() != OpndSize_32) {
742+ const char * opndSizeString = getOpndSizeString(opnd.size());
743+
744+ if (opndSizeString == NULL) {
745+ // If the string that represents operand size is null it means that
746+ // the operand size is an invalid value. Although this could be a
747+ // problem if instruction is corrupted, technically failing to
748+ // disassemble is not fatal. Thus, let's warn but proceed with using
749+ // an empty string.
750+ ALOGW("JIT-WARNING: Cannot decode instruction operand size.");
751+ opndSizeString = "";
752+ }
753+
754+ sz += snprintf(&strbuf[sz], len-sz, "%s ", opndSizeString);
755+ }
756+ if(opnd.is_mem()) {
757+ if(opnd.scale() != 0) {
758+ sz += snprintf(&strbuf[sz], len-sz, "%d(%s,%s,%d)", opnd.disp(),
759+ getRegNameString(opnd.base()),
760+ getRegNameString(opnd.index()), opnd.scale());
761+ } else {
762+ sz += snprintf(&strbuf[sz], len-sz, "%d(%s)",
763+ opnd.disp(), getRegNameString(opnd.base()));
764+ }
765+ } else if(opnd.is_imm()) {
766+ sz += snprintf(&strbuf[sz], len-sz, "#%x", (int)opnd.imm());
767+ } else if(opnd.is_reg()) {
768+ sz += snprintf(&strbuf[sz], len-sz, "%s",
769+ getRegNameString(opnd.reg()));
770+ }
771+ return sz;
772+}
773+
774+// Disassemble the instruction "decInst" and put the readable format
775+// in "strbuf" up to a string length of "len".
776+void DisassembleInstToBuf(Inst& decInst, char* strbuf, unsigned int len)
777+{
778+ unsigned int sz = 0;
779+ int k;
780+ sz += snprintf(&strbuf[sz], len-sz, "%s ", EncoderBase::toStr(decInst.mn));
781+ if (decInst.argc > 0) {
782+ sz += DisassembleOperandToBuf(decInst.operands[decInst.argc-1],
783+ &strbuf[sz], len-sz);
784+ for(k = decInst.argc-2; k >= 0; k--) {
785+ sz += snprintf(&strbuf[sz], len-sz, ", ");
786+ sz += DisassembleOperandToBuf(decInst.operands[k], &strbuf[sz], len-sz);
787+ }
788+ }
789+}
790+
791+// Disassmble the x86 instruction pointed to by code pointer "stream."
792+// Put the disassemble text in the "strbuf" up to string length "len".
793+// Return the code pointer after the disassemble x86 instruction.
794+extern "C" ENCODER_DECLARE_EXPORT
795+char* decoder_disassemble_instr(char* stream, char* strbuf, unsigned int len)
796+{
797+ Inst decInst;
798+ unsigned numBytes = DecoderBase::decode(stream, &decInst);
799+ DisassembleInstToBuf(decInst, strbuf, len);
800+ return (stream + numBytes);
801+}
802+
803+/**
804+ * @brief Physical register char* counterparts
805+ */
806+static const char * PhysicalRegString[] = { "eax", "ebx", "ecx", "edx", "edi",
807+ "esi", "esp", "ebp", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
808+ "xmm6", "xmm7", "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7",
809+ "null"
810+ };
811+
812+/**
813+ * @brief Scratch register char* counterparts
814+ */
815+static const char * ScratchRegString[] = { "scratch1", "scratch2", "scratch3",
816+ "scratch4", "scratch5", "scratch6", "scratch7", "scratch8", "scratch9",
817+ "scratch10" };
818+
819+extern "C" ENCODER_DECLARE_EXPORT
820+/**
821+ * @brief Transform a physical register into its char* counterpart
822+ * @param reg the PhysicalReg we want to have a char* equivalent
823+ * @return the register reg in char* form
824+ */
825+const char * physicalRegToString(PhysicalReg reg)
826+{
827+ if (reg < PhysicalReg_Null) {
828+ return PhysicalRegString[reg];
829+ } else if (reg >= PhysicalReg_SCRATCH_1 && reg <= PhysicalReg_SCRATCH_10) {
830+ return ScratchRegString[reg - PhysicalReg_SCRATCH_1];
831+ } else if (reg == PhysicalReg_Null) {
832+ return "null";
833+ } else {
834+ return "corrupted-data";
835+ }
836+}
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.h
@@ -0,0 +1,283 @@
1+/*
2+ * Copyright (C) 2012 The Android Open Source Project
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+
17+#ifndef _VM_ENC_WRAPPER_H_
18+#define _VM_ENC_WRAPPER_H_
19+
20+#include "enc_defs_ext.h"
21+
22+extern bool dump_x86_inst;
23+typedef enum PhysicalReg {
24+ // Currently initializing StartOfGPMarker to be 0 in order to match
25+ // register index in Reg_No. However, ideally PhysicalReg_Null should
26+ // be 0 and the rest moved over.
27+ PhysicalReg_StartOfGPMarker = 0,
28+ PhysicalReg_EAX = PhysicalReg_StartOfGPMarker,
29+ PhysicalReg_EBX, PhysicalReg_ECX, PhysicalReg_EDX,
30+ PhysicalReg_EDI, PhysicalReg_ESI, PhysicalReg_ESP, PhysicalReg_EBP,
31+ PhysicalReg_EndOfGPMarker = PhysicalReg_EBP,
32+
33+ PhysicalReg_StartOfXmmMarker,
34+ PhysicalReg_XMM0 = PhysicalReg_StartOfXmmMarker,
35+ PhysicalReg_XMM1, PhysicalReg_XMM2, PhysicalReg_XMM3,
36+ PhysicalReg_XMM4, PhysicalReg_XMM5, PhysicalReg_XMM6, PhysicalReg_XMM7,
37+ PhysicalReg_EndOfXmmMarker = PhysicalReg_XMM7,
38+
39+ PhysicalReg_StartOfX87Marker,
40+ PhysicalReg_ST0 = PhysicalReg_StartOfX87Marker, PhysicalReg_ST1,
41+ PhysicalReg_ST2, PhysicalReg_ST3, PhysicalReg_ST4, PhysicalReg_ST5,
42+ PhysicalReg_ST6, PhysicalReg_ST7,
43+ PhysicalReg_EndOfX87Marker = PhysicalReg_ST7,
44+
45+ PhysicalReg_Null,
46+ //used as scratch logical register in NCG O1
47+ //should not overlap with regular logical register, start from 100
48+ PhysicalReg_SCRATCH_1 = 100, PhysicalReg_SCRATCH_2, PhysicalReg_SCRATCH_3, PhysicalReg_SCRATCH_4,
49+ PhysicalReg_SCRATCH_5, PhysicalReg_SCRATCH_6, PhysicalReg_SCRATCH_7, PhysicalReg_SCRATCH_8,
50+ PhysicalReg_SCRATCH_9, PhysicalReg_SCRATCH_10,
51+
52+ //This should be the last entry
53+ PhysicalReg_Last = PhysicalReg_SCRATCH_10
54+} PhysicalReg;
55+
56+typedef enum Reg_No {
57+#ifdef _EM64T_
58+ rax_reg = 0,rbx_reg, rcx_reg, rdx_reg,
59+ rdi_reg, rsi_reg, rsp_reg, rbp_reg,
60+ r8_reg, r9_reg, r10_reg, r11_reg,
61+ r12_reg, r13_reg, r14_reg, r15_reg,
62+ xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg,
63+ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg,
64+ xmm8_reg, xmm9_reg, xmm10_reg, xmm11_reg,
65+ xmm12_reg, xmm13_reg, xmm14_reg, xmm15_reg,
66+
67+#else // !defined(_EM64T_)
68+
69+ eax_reg = 0,ebx_reg, ecx_reg, edx_reg,
70+ edi_reg, esi_reg, esp_reg, ebp_reg,
71+ xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg,
72+ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg,
73+ fs_reg,
74+#endif
75+ /** @brief Total number of registers.*/
76+ n_reg
77+} Reg_No;
78+//
79+// instruction operand sizes: 8,16,32,64 bits
80+//
81+typedef enum Opnd_Size {
82+ size_8 = 0,
83+ size_16,
84+ size_32,
85+ size_64,
86+ n_size,
87+#ifdef _EM64T_
88+ size_platf = size_64
89+#else
90+ size_platf = size_32
91+#endif
92+} Opnd_Size;
93+
94+//
95+// opcodes for alu instructions
96+//
97+typedef enum ALU_Opcode {
98+ add_opc = 0,or_opc, adc_opc, sbb_opc,
99+ and_opc, sub_opc, xor_opc, cmp_opc,
100+ mul_opc, imul_opc, div_opc, idiv_opc,
101+ sll_opc, srl_opc, sra_opc, //shift right arithmetic
102+ shl_opc, shr_opc,
103+ sal_opc, sar_opc,
104+ neg_opc, not_opc, andn_opc,
105+ n_alu
106+} ALU_Opcode;
107+
108+typedef enum ConditionCode {
109+ Condition_O = 0,
110+ Condition_NO = 1,
111+ Condition_B = 2,
112+ Condition_NAE = Condition_B,
113+ Condition_C = Condition_B,
114+ Condition_NB = 3,
115+ Condition_AE = Condition_NB,
116+ Condition_NC = Condition_NB,
117+ Condition_Z = 4,
118+ Condition_E = Condition_Z,
119+ Condition_NZ = 5,
120+ Condition_NE = Condition_NZ,
121+ Condition_BE = 6,
122+ Condition_NA = Condition_BE,
123+ Condition_NBE = 7,
124+ Condition_A = Condition_NBE,
125+
126+ Condition_S = 8,
127+ Condition_NS = 9,
128+ Condition_P = 10,
129+ Condition_PE = Condition_P,
130+ Condition_NP = 11,
131+ Condition_PO = Condition_NP,
132+ Condition_L = 12,
133+ Condition_NGE = Condition_L,
134+ Condition_NL = 13,
135+ Condition_GE = Condition_NL,
136+ Condition_LE = 14,
137+ Condition_NG = Condition_LE,
138+ Condition_NLE = 15,
139+ Condition_G = Condition_NLE,
140+ Condition_Count = 16
141+} ConditionCode;
142+
143+//
144+// prefix code
145+//
146+typedef enum InstrPrefix {
147+ no_prefix,
148+ lock_prefix = 0xF0,
149+ hint_branch_taken_prefix = 0x2E,
150+ hint_branch_not_taken_prefix = 0x3E,
151+ prefix_repne = 0xF2,
152+ prefix_repnz = prefix_repne,
153+ prefix_repe = 0xF3,
154+ prefix_repz = prefix_repe,
155+ prefix_rep = 0xF3,
156+ prefix_cs = 0x2E,
157+ prefix_ss = 0x36,
158+ prefix_ds = 0x3E,
159+ prefix_es = 0x26,
160+ prefix_fs = 0x64,
161+ prefix_gs = 0x65
162+} InstrPrefix;
163+
164+enum LowOpndRegType
165+{
166+ LowOpndRegType_gp = 0,
167+ LowOpndRegType_fs = 1,
168+ LowOpndRegType_xmm = 2,
169+ LowOpndRegType_fs_s = 3,
170+ LowOpndRegType_ss = 4,
171+ LowOpndRegType_invalid = 256,
172+};
173+
174+enum LogicalRegType
175+{
176+ LogicalType_invalid = 0,
177+ LowOpndRegType_scratch = 8,
178+ LowOpndRegType_temp = 16,
179+ LowOpndRegType_hard = 32,
180+ LowOpndRegType_virtual = 64,
181+};
182+
183+//if inline, separte enc_wrapper.cpp into two files, one of them is .inl
184+// enc_wrapper.cpp needs to handle both cases
185+#ifdef ENCODER_INLINE
186+ #define ENCODER_DECLARE_EXPORT inline
187+ #include "enc_wrapper.inl"
188+#else
189+ #define ENCODER_DECLARE_EXPORT
190+#endif
191+
192+#ifdef __cplusplus
193+extern "C"
194+{
195+#endif
196+ENCODER_DECLARE_EXPORT char* encoder_imm(Mnemonic m, OpndSize size,
197+ int imm, char* stream);
198+ENCODER_DECLARE_EXPORT unsigned encoder_get_inst_size(char * stream);
199+ENCODER_DECLARE_EXPORT char* encoder_update_imm(int imm, char * stream);
200+ENCODER_DECLARE_EXPORT char* encoder_mem(Mnemonic m, OpndSize size,
201+ int disp, int base_reg, bool isBasePhysical, char* stream);
202+ENCODER_DECLARE_EXPORT char* encoder_reg(Mnemonic m, OpndSize size,
203+ int reg, bool isPhysical, LowOpndRegType type, char* stream);
204+ENCODER_DECLARE_EXPORT char* encoder_reg_reg(Mnemonic m, OpndSize size,
205+ int reg, bool isPhysical,
206+ int reg2, bool isPhysical2, LowOpndRegType type, char* stream);
207+ENCODER_DECLARE_EXPORT char* encoder_reg_reg_diff_sizes(Mnemonic m, OpndSize srcOpndSize,
208+ int reg, bool isPhysical, OpndSize destOpndSize,
209+ int reg2, bool isPhysical2, LowOpndRegType type, char* stream);
210+ENCODER_DECLARE_EXPORT char* encoder_mem_reg(Mnemonic m, OpndSize size,
211+ int disp, int base_reg, bool isBasePhysical,
212+ int reg, bool isPhysical, LowOpndRegType type, char* stream);
213+ENCODER_DECLARE_EXPORT char* encoder_mem_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize,
214+ int disp, int base_reg, bool isBasePhysical, OpndSize regOpndSize,
215+ int reg, bool isPhysical, LowOpndRegType type, char* stream);
216+ENCODER_DECLARE_EXPORT char* encoder_mem_scale_reg(Mnemonic m, OpndSize size,
217+ int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale,
218+ int reg, bool isPhysical, LowOpndRegType type, char* stream);
219+ENCODER_DECLARE_EXPORT char* encoder_reg_mem_scale(Mnemonic m, OpndSize size,
220+ int reg, bool isPhysical,
221+ int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale,
222+ LowOpndRegType type, char* stream);
223+ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_reg(Mnemonic m, OpndSize size,
224+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
225+ int reg, bool isPhysical, LowOpndRegType type, char * stream);
226+ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize,
227+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
228+ OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream);
229+ENCODER_DECLARE_EXPORT char * encoder_movzs_mem_disp_scale_reg(Mnemonic m, OpndSize size,
230+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
231+ int reg, bool isPhysical, LowOpndRegType type, char * stream);
232+ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_2(Mnemonic m, OpndSize memOpndSize,
233+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
234+ OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream);
235+ENCODER_DECLARE_EXPORT char* encoder_reg_mem_disp_scale(Mnemonic m, OpndSize size,
236+ int reg, bool isPhysical,
237+ int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale,
238+ LowOpndRegType type, char* stream);
239+ENCODER_DECLARE_EXPORT char* encoder_reg_mem(Mnemonic m, OpndSize size,
240+ int reg, bool isPhysical,
241+ int disp, int base_reg, bool isBasePhysical, LowOpndRegType type, char* stream);
242+ENCODER_DECLARE_EXPORT char* encoder_imm_reg(Mnemonic m, OpndSize size,
243+ int imm, int reg, bool isPhysical, LowOpndRegType type, char* stream);
244+ENCODER_DECLARE_EXPORT char * encoder_imm_reg_diff_sizes(Mnemonic m, OpndSize sizeImm,
245+ int imm, OpndSize sizeReg, int reg, bool isPhysical, LowOpndRegType type, char * stream);
246+ENCODER_DECLARE_EXPORT char * encoder_update_imm_rm(int imm, char * stream);
247+ENCODER_DECLARE_EXPORT char* encoder_imm_mem(Mnemonic m, OpndSize size,
248+ int imm,
249+ int disp, int base_reg, bool isBasePhysical, char* stream);
250+ENCODER_DECLARE_EXPORT char * encoder_imm_mem_diff_sizes (Mnemonic m, OpndSize immOpndSize, int imm,
251+ OpndSize memOpndSize, int disp, int baseRegister, bool isBasePhysical, char * stream);
252+ENCODER_DECLARE_EXPORT char* encoder_fp_mem(Mnemonic m, OpndSize size, int reg,
253+ int disp, int base_reg, bool isBasePhysical, char* stream);
254+ENCODER_DECLARE_EXPORT char* encoder_mem_fp(Mnemonic m, OpndSize size,
255+ int disp, int base_reg, bool isBasePhysical,
256+ int reg, char* stream);
257+ENCODER_DECLARE_EXPORT char* encoder_return(char* stream);
258+ENCODER_DECLARE_EXPORT char* encoder_compare_fp_stack(bool pop, int reg, bool isDouble, char* stream);
259+ENCODER_DECLARE_EXPORT char* encoder_movez_mem_to_reg(OpndSize size,
260+ int disp, int base_reg, bool isBasePhysical,
261+ int reg, bool isPhysical, char* stream);
262+ENCODER_DECLARE_EXPORT char* encoder_moves_mem_to_reg(OpndSize size,
263+ int disp, int base_reg, bool isBasePhysical,
264+ int reg, bool isPhysical, char* stream);
265+ENCODER_DECLARE_EXPORT char * encoder_movez_reg_to_reg(OpndSize size,
266+ int reg, bool isPhysical, int reg2,
267+ bool isPhysical2, LowOpndRegType type, char * stream);
268+ENCODER_DECLARE_EXPORT char * encoder_moves_reg_to_reg(OpndSize size,
269+ int reg, bool isPhysical, int reg2,
270+ bool isPhysical2, LowOpndRegType type, char * stream);
271+ENCODER_DECLARE_EXPORT char * encoder_imm_reg_reg (Mnemonic m, int imm, OpndSize immediateSize,
272+ int sourceReg, OpndSize sourceRegSize, int destReg,
273+ OpndSize destRegSize, char * stream);
274+ENCODER_DECLARE_EXPORT char * encoder_nops(unsigned numBytes, char * stream);
275+ENCODER_DECLARE_EXPORT int decodeThenPrint(char* stream_start);
276+ENCODER_DECLARE_EXPORT char* decoder_disassemble_instr(char* stream, char* strbuf, unsigned int len);
277+
278+//Provide a char* equivalent to a PhysicalReg type
279+ENCODER_DECLARE_EXPORT const char * physicalRegToString(PhysicalReg reg);
280+#ifdef __cplusplus
281+}
282+#endif
283+#endif // _VM_ENC_WRAPPER_H_
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/encoder.h
@@ -0,0 +1,717 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+/**
21+ * @file
22+ * @brief Simple interface for generating processor instructions.
23+ *
24+ * The interface works for both IA32 and EM64T. By default, only IA32
25+ * capabilities are presented. To enable EM64T feature, the _EM64T_ macro
26+ * must be defined (and, of course, a proper library version to be used).
27+ *
28+ * The interface is based on the original ia32.h encoder interface,
29+ * with some simplifications and add-ons - EM64T-specific, SSE and SSE2.
30+ *
31+ * The interface mostly intended for existing legacy code like LIL code
32+ * generator. From the implementation point of view, it's just a wrapper
33+ * around the EncoderBase functionality.
34+ */
35+
36+#ifndef _VM_ENCODER_H_
37+#define _VM_ENCODER_H_
38+
39+#include <limits.h>
40+#include "enc_base.h"
41+//#include "open/types.h"
42+
43+#ifdef _EM64T_
44+// size of general-purpose value on the stack in bytes
45+#define GR_STACK_SIZE 8
46+// size of floating-point value on the stack in bytes
47+#define FR_STACK_SIZE 8
48+
49+#if defined(WIN32) || defined(_WIN64)
50+ // maximum number of GP registers for inputs
51+ const int MAX_GR = 4;
52+ // maximum number of FP registers for inputs
53+ const int MAX_FR = 4;
54+ // WIN64 reserves 4 words for shadow space
55+ const int SHADOW = 4 * GR_STACK_SIZE;
56+#else
57+ // maximum number of GP registers for inputs
58+ const int MAX_GR = 6;
59+ // maximum number of FP registers for inputs
60+ const int MAX_FR = 8;
61+ // Linux x64 doesn't reserve shadow space
62+ const int SHADOW = 0;
63+#endif
64+
65+#else
66+// size of general-purpose value on the stack in bytes
67+#define GR_STACK_SIZE 4
68+// size of general-purpose value on the stack in bytes
69+#define FR_STACK_SIZE 8
70+
71+// maximum number of GP registers for inputs
72+const int MAX_GR = 0;
73+// maximum number of FP registers for inputs
74+const int MAX_FR = 0;
75+#endif
76+
77+typedef enum Reg_No {
78+#ifdef _EM64T_
79+ rax_reg = 0,rbx_reg, rcx_reg, rdx_reg,
80+ rdi_reg, rsi_reg, rsp_reg, rbp_reg,
81+ r8_reg, r9_reg, r10_reg, r11_reg,
82+ r12_reg, r13_reg, r14_reg, r15_reg,
83+ xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg,
84+ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg,
85+ xmm8_reg, xmm9_reg, xmm10_reg, xmm11_reg,
86+ xmm12_reg, xmm13_reg, xmm14_reg, xmm15_reg,
87+
88+#else // !defined(_EM64T_)
89+
90+ eax_reg = 0,ebx_reg, ecx_reg, edx_reg,
91+ edi_reg, esi_reg, esp_reg, ebp_reg,
92+ xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg,
93+ xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg,
94+ fs_reg,
95+#endif
96+ /** @brief Total number of registers.*/
97+ n_reg
98+} Reg_No;
99+//
100+// instruction operand sizes: 8,16,32,64 bits
101+//
102+typedef enum Opnd_Size {
103+ size_8 = 0,
104+ size_16,
105+ size_32,
106+ size_64,
107+ n_size,
108+#ifdef _EM64T_
109+ size_platf = size_64
110+#else
111+ size_platf = size_32
112+#endif
113+} Opnd_Size;
114+
115+//
116+// opcodes for alu instructions
117+//
118+typedef enum ALU_Opcode {
119+ add_opc = 0,or_opc, adc_opc, sbb_opc,
120+ and_opc, sub_opc, xor_opc, cmp_opc,
121+ n_alu
122+} ALU_Opcode;
123+
124+//
125+// opcodes for shift instructions
126+//
127+typedef enum Shift_Opcode {
128+ shld_opc, shrd_opc, shl_opc, shr_opc,
129+ sar_opc, ror_opc, max_shift_opcode=6, n_shift = 6
130+} Shift_Opcode;
131+
132+typedef enum ConditionCode {
133+ Condition_O = 0,
134+ Condition_NO = 1,
135+ Condition_B = 2,
136+ Condition_NAE = Condition_B,
137+ Condition_C = Condition_B,
138+ Condition_NB = 3,
139+ Condition_AE = Condition_NB,
140+ Condition_NC = Condition_NB,
141+ Condition_Z = 4,
142+ Condition_E = Condition_Z,
143+ Condition_NZ = 5,
144+ Condition_NE = Condition_NZ,
145+ Condition_BE = 6,
146+ Condition_NA = Condition_BE,
147+ Condition_NBE = 7,
148+ Condition_A = Condition_NBE,
149+
150+ Condition_S = 8,
151+ Condition_NS = 9,
152+ Condition_P = 10,
153+ Condition_PE = Condition_P,
154+ Condition_NP = 11,
155+ Condition_PO = Condition_NP,
156+ Condition_L = 12,
157+ Condition_NGE = Condition_L,
158+ Condition_NL = 13,
159+ Condition_GE = Condition_NL,
160+ Condition_LE = 14,
161+ Condition_NG = Condition_LE,
162+ Condition_NLE = 15,
163+ Condition_G = Condition_NLE,
164+ Condition_Count = 16
165+} ConditionCode;
166+
167+//
168+// prefix code
169+//
170+typedef enum InstrPrefix {
171+ no_prefix,
172+ lock_prefix = 0xF0,
173+ hint_branch_taken_prefix = 0x2E,
174+ hint_branch_not_taken_prefix = 0x3E,
175+ prefix_repne = 0xF2,
176+ prefix_repnz = prefix_repne,
177+ prefix_repe = 0xF3,
178+ prefix_repz = prefix_repe,
179+ prefix_rep = 0xF3,
180+ prefix_cs = 0x2E,
181+ prefix_ss = 0x36,
182+ prefix_ds = 0x3E,
183+ prefix_es = 0x26,
184+ prefix_fs = 0x64,
185+ prefix_gs = 0x65
186+} InstrPrefix;
187+
188+
189+//
190+// an instruction operand
191+//
192+class Opnd {
193+
194+protected:
195+ enum Tag { SignedImm, UnsignedImm, Reg, Mem, FP, XMM };
196+
197+ const Tag tag;
198+
199+ Opnd(Tag t): tag(t) {}
200+
201+public:
202+ void * operator new(size_t, void * mem) {
203+ return mem;
204+ }
205+
206+ void operator delete(void *) {}
207+
208+ void operator delete(void *, void *) {}
209+
210+private:
211+ // disallow copying
212+ Opnd(const Opnd &): tag(Mem) { assert(false); }
213+ Opnd& operator=(const Opnd &) { assert(false); return *this; }
214+};
215+typedef int I_32;
216+class Imm_Opnd: public Opnd {
217+
218+protected:
219+ union {
220+#ifdef _EM64T_
221+ int64 value;
222+ unsigned char bytes[8];
223+#else
224+ I_32 value;
225+ unsigned char bytes[4];
226+#endif
227+ };
228+ Opnd_Size size;
229+
230+public:
231+ Imm_Opnd(I_32 val, bool isSigned = true):
232+ Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(size_32) {
233+ if (isSigned) {
234+ if (CHAR_MIN <= val && val <= CHAR_MAX) {
235+ size = size_8;
236+ } else if (SHRT_MIN <= val && val <= SHRT_MAX) {
237+ size = size_16;
238+ }
239+ } else {
240+ assert(val >= 0);
241+ if (val <= UCHAR_MAX) {
242+ size = size_8;
243+ } else if (val <= USHRT_MAX) {
244+ size = size_16;
245+ }
246+ }
247+ }
248+ Imm_Opnd(const Imm_Opnd& that): Opnd(that.tag), value(that.value), size(that.size) {};
249+
250+#ifdef _EM64T_
251+ Imm_Opnd(Opnd_Size sz, int64 val, bool isSigned = true):
252+ Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(sz) {
253+#ifndef NDEBUG
254+ switch (size) {
255+ case size_8:
256+ assert(val == (int64)(I_8)val);
257+ break;
258+ case size_16:
259+ assert(val == (int64)(int16)val);
260+ break;
261+ case size_32:
262+ assert(val == (int64)(I_32)val);
263+ break;
264+ case size_64:
265+ break;
266+ case n_size:
267+ assert(false);
268+ break;
269+ }
270+#endif // NDEBUG
271+ }
272+
273+ int64 get_value() const { return value; }
274+
275+#else
276+
277+ Imm_Opnd(Opnd_Size sz, I_32 val, int isSigned = true):
278+ Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(sz) {
279+#ifndef NDEBUG
280+ switch (size) {
281+ case size_8:
282+ assert((I_32)val == (I_32)(I_8)val);
283+ break;
284+ case size_16:
285+ assert((I_32)val == (I_32)(int16)val);
286+ break;
287+ case size_32:
288+ break;
289+ case size_64:
290+ case n_size:
291+ assert(false);
292+ break;
293+ }
294+#endif // NDEBUG
295+ }
296+
297+ I_32 get_value() const { return value; }
298+
299+#endif
300+ Opnd_Size get_size() const { return size; }
301+ bool is_signed() const { return tag == SignedImm; }
302+};
303+
304+class RM_Opnd: public Opnd {
305+
306+public:
307+ bool is_reg() const { return tag != SignedImm && tag != UnsignedImm && tag != Mem; }
308+
309+protected:
310+ RM_Opnd(Tag t): Opnd(t) {}
311+
312+private:
313+ // disallow copying
314+ RM_Opnd(const RM_Opnd &): Opnd(Reg) { assert(false); }
315+};
316+
317+class R_Opnd: public RM_Opnd {
318+
319+protected:
320+ Reg_No _reg_no;
321+
322+public:
323+ R_Opnd(Reg_No r): RM_Opnd(Reg), _reg_no(r) {}
324+ Reg_No reg_no() const { return _reg_no; }
325+
326+private:
327+ // disallow copying
328+ R_Opnd(const R_Opnd &): RM_Opnd(Reg) { assert(false); }
329+};
330+
331+//
332+// a memory operand with displacement
333+// Can also serve as a full memory operand with base,index, displacement and scale.
334+// Use n_reg to specify 'no register', say, for index.
335+class M_Opnd: public RM_Opnd {
336+
337+protected:
338+ Imm_Opnd m_disp;
339+ Imm_Opnd m_scale;
340+ R_Opnd m_index;
341+ R_Opnd m_base;
342+
343+public:
344+ //M_Opnd(Opnd_Size sz): RM_Opnd(Mem, K_M, sz), m_disp(0), m_scale(0), m_index(n_reg), m_base(n_reg) {}
345+ M_Opnd(I_32 disp):
346+ RM_Opnd(Mem), m_disp(disp), m_scale(0), m_index(n_reg), m_base(n_reg) {}
347+ M_Opnd(Reg_No rbase, I_32 rdisp):
348+ RM_Opnd(Mem), m_disp(rdisp), m_scale(0), m_index(n_reg), m_base(rbase) {}
349+ M_Opnd(I_32 disp, Reg_No rbase, Reg_No rindex, unsigned scale):
350+ RM_Opnd(Mem), m_disp(disp), m_scale(scale), m_index(rindex), m_base(rbase) {}
351+ M_Opnd(const M_Opnd & that) : RM_Opnd(Mem),
352+ m_disp((int)that.m_disp.get_value()), m_scale((int)that.m_scale.get_value()),
353+ m_index(that.m_index.reg_no()), m_base(that.m_base.reg_no())
354+ {}
355+ //
356+ inline const R_Opnd & base(void) const { return m_base; }
357+ inline const R_Opnd & index(void) const { return m_index; }
358+ inline const Imm_Opnd & scale(void) const { return m_scale; }
359+ inline const Imm_Opnd & disp(void) const { return m_disp; }
360+};
361+
362+//
363+// a memory operand with base register and displacement
364+//
365+class M_Base_Opnd: public M_Opnd {
366+
367+public:
368+ M_Base_Opnd(Reg_No base, I_32 disp) : M_Opnd(disp, base, n_reg, 0) {}
369+
370+private:
371+ // disallow copying - but it leads to ICC errors #734 in encoder.inl
372+ // M_Base_Opnd(const M_Base_Opnd &): M_Opnd(0) { assert(false); }
373+};
374+
375+//
376+// a memory operand with base register, scaled index register
377+// and displacement.
378+//
379+class M_Index_Opnd : public M_Opnd {
380+
381+public:
382+ M_Index_Opnd(Reg_No base, Reg_No index, I_32 disp, unsigned scale):
383+ M_Opnd(disp, base, index, scale) {}
384+
385+private:
386+ // disallow copying - but it leads to ICC errors #734 in encoder.inl
387+ // M_Index_Opnd(const M_Index_Opnd &): M_Opnd(0) { assert(false); }
388+};
389+
390+class XMM_Opnd : public Opnd {
391+
392+protected:
393+ unsigned m_idx;
394+
395+public:
396+ XMM_Opnd(unsigned _idx): Opnd(XMM), m_idx(_idx) {};
397+ unsigned get_idx( void ) const { return m_idx; };
398+
399+private:
400+ // disallow copying
401+ XMM_Opnd(const XMM_Opnd &): Opnd(XMM) { assert(false); }
402+};
403+
404+//
405+// operand structures for ia32 registers
406+//
407+#ifdef _EM64T_
408+
409+extern R_Opnd rax_opnd;
410+extern R_Opnd rcx_opnd;
411+extern R_Opnd rdx_opnd;
412+extern R_Opnd rbx_opnd;
413+extern R_Opnd rdi_opnd;
414+extern R_Opnd rsi_opnd;
415+extern R_Opnd rsp_opnd;
416+extern R_Opnd rbp_opnd;
417+
418+extern R_Opnd r8_opnd;
419+extern R_Opnd r9_opnd;
420+extern R_Opnd r10_opnd;
421+extern R_Opnd r11_opnd;
422+extern R_Opnd r12_opnd;
423+extern R_Opnd r13_opnd;
424+extern R_Opnd r14_opnd;
425+extern R_Opnd r15_opnd;
426+
427+extern XMM_Opnd xmm8_opnd;
428+extern XMM_Opnd xmm9_opnd;
429+extern XMM_Opnd xmm10_opnd;
430+extern XMM_Opnd xmm11_opnd;
431+extern XMM_Opnd xmm12_opnd;
432+extern XMM_Opnd xmm13_opnd;
433+extern XMM_Opnd xmm14_opnd;
434+extern XMM_Opnd xmm15_opnd;
435+#else
436+
437+extern R_Opnd eax_opnd;
438+extern R_Opnd ecx_opnd;
439+extern R_Opnd edx_opnd;
440+extern R_Opnd ebx_opnd;
441+extern R_Opnd esp_opnd;
442+extern R_Opnd ebp_opnd;
443+extern R_Opnd esi_opnd;
444+extern R_Opnd edi_opnd;
445+
446+#endif // _EM64T_
447+
448+extern XMM_Opnd xmm0_opnd;
449+extern XMM_Opnd xmm1_opnd;
450+extern XMM_Opnd xmm2_opnd;
451+extern XMM_Opnd xmm3_opnd;
452+extern XMM_Opnd xmm4_opnd;
453+extern XMM_Opnd xmm5_opnd;
454+extern XMM_Opnd xmm6_opnd;
455+extern XMM_Opnd xmm7_opnd;
456+
457+#ifdef NO_ENCODER_INLINE
458+ #define ENCODER_DECLARE_EXPORT
459+#else
460+ #define ENCODER_DECLARE_EXPORT inline
461+ #include "encoder.inl"
462+#endif
463+
464+// prefix
465+ENCODER_DECLARE_EXPORT char * prefix(char * stream, InstrPrefix p);
466+
467+// stack push and pop instructions
468+ENCODER_DECLARE_EXPORT char * push(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
469+ENCODER_DECLARE_EXPORT char * push(char * stream, const Imm_Opnd & imm);
470+ENCODER_DECLARE_EXPORT char * pop(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
471+
472+// cmpxchg or xchg
473+ENCODER_DECLARE_EXPORT char * cmpxchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf);
474+ENCODER_DECLARE_EXPORT char * xchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf);
475+
476+// inc(rement), dec(rement), not, neg(ate) instructions
477+ENCODER_DECLARE_EXPORT char * inc(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
478+ENCODER_DECLARE_EXPORT char * dec(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
479+ENCODER_DECLARE_EXPORT char * _not(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
480+ENCODER_DECLARE_EXPORT char * neg(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
481+ENCODER_DECLARE_EXPORT char * nop(char * stream);
482+ENCODER_DECLARE_EXPORT char * int3(char * stream);
483+
484+// alu instructions: add, or, adc, sbb, and, sub, xor, cmp
485+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
486+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz = size_platf);
487+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
488+
489+// test instruction
490+ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
491+ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf);
492+
493+// shift instructions: shl, shr, sar, shld, shrd, ror
494+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
495+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, Opnd_Size sz = size_platf);
496+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
497+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf);
498+
499+// multiply instructions: mul, imul
500+ENCODER_DECLARE_EXPORT char * mul(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
501+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
502+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
503+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, const Imm_Opnd& imm, Opnd_Size sz = size_platf);
504+
505+// divide instructions: div, idiv
506+ENCODER_DECLARE_EXPORT char * idiv(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
507+ENCODER_DECLARE_EXPORT char * div(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
508+
509+// data movement: mov
510+ENCODER_DECLARE_EXPORT char * mov(char * stream, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz = size_platf);
511+ENCODER_DECLARE_EXPORT char * mov(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
512+ENCODER_DECLARE_EXPORT char * mov(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf);
513+
514+ENCODER_DECLARE_EXPORT char * movsx( char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
515+ENCODER_DECLARE_EXPORT char * movzx( char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
516+
517+ENCODER_DECLARE_EXPORT char * movd(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm);
518+ENCODER_DECLARE_EXPORT char * movd(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm);
519+ENCODER_DECLARE_EXPORT char * movq(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm);
520+ENCODER_DECLARE_EXPORT char * movq(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm);
521+
522+// sse mov
523+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
524+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const M_Opnd & mem, const XMM_Opnd & xmm, bool dbl);
525+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
526+
527+// sse add, sub, mul, div
528+ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
529+ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
530+
531+ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
532+ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
533+
534+ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
535+ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
536+
537+ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
538+ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
539+
540+// xor, compare
541+ENCODER_DECLARE_EXPORT char * sse_xor(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1);
542+
543+ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
544+ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem, bool dbl);
545+
546+// sse conversions
547+ENCODER_DECLARE_EXPORT char * sse_cvt_si(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl);
548+ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const M_Opnd & mem, bool dbl);
549+ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const XMM_Opnd & xmm, bool dbl);
550+ENCODER_DECLARE_EXPORT char * sse_cvt_fp2dq(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
551+ENCODER_DECLARE_EXPORT char * sse_cvt_dq2fp(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl);
552+ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem64);
553+ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1);
554+ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem32);
555+ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1);
556+
557+// condition operations
558+ENCODER_DECLARE_EXPORT char * cmov(char * stream, ConditionCode cc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf);
559+ENCODER_DECLARE_EXPORT char * setcc(char * stream, ConditionCode cc, const RM_Opnd & rm8);
560+
561+// load effective address: lea
562+ENCODER_DECLARE_EXPORT char * lea(char * stream, const R_Opnd & r, const M_Opnd & m, Opnd_Size sz = size_platf);
563+ENCODER_DECLARE_EXPORT char * cdq(char * stream);
564+ENCODER_DECLARE_EXPORT char * wait(char * stream);
565+
566+// control-flow instructions
567+ENCODER_DECLARE_EXPORT char * loop(char * stream, const Imm_Opnd & imm);
568+
569+// jump with 8-bit relative
570+ENCODER_DECLARE_EXPORT char * jump8(char * stream, const Imm_Opnd & imm);
571+
572+// jump with 32-bit relative
573+ENCODER_DECLARE_EXPORT char * jump32(char * stream, const Imm_Opnd & imm);
574+
575+// register indirect jump
576+ENCODER_DECLARE_EXPORT char * jump(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
577+
578+// jump to target address
579+ENCODER_DECLARE_EXPORT char *jump(char * stream, char *target);
580+
581+// jump with displacement
582+//char * jump(char * stream, I_32 disp);
583+
584+// conditional branch with 8-bit branch offset
585+ENCODER_DECLARE_EXPORT char * branch8(char * stream, ConditionCode cc, const Imm_Opnd & imm, InstrPrefix prefix = no_prefix);
586+
587+// conditional branch with 32-bit branch offset
588+ENCODER_DECLARE_EXPORT char * branch32(char * stream, ConditionCode cc, const Imm_Opnd & imm, InstrPrefix prefix = no_prefix);
589+
590+// conditional branch with target label address
591+//char * branch(char * stream, ConditionCode cc, const char * target, InstrPrefix prefix = no_prefix);
592+
593+// conditional branch with displacement immediate
594+ENCODER_DECLARE_EXPORT char * branch(char * stream, ConditionCode cc, I_32 disp, InstrPrefix prefix = no_prefix);
595+
596+// call with displacement
597+ENCODER_DECLARE_EXPORT char * call(char * stream, const Imm_Opnd & imm);
598+
599+// indirect call through register or memory location
600+ENCODER_DECLARE_EXPORT char * call(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf);
601+
602+// call target address
603+ENCODER_DECLARE_EXPORT char * call(char * stream, const char * target);
604+
605+// return instruction
606+ENCODER_DECLARE_EXPORT char * ret(char * stream);
607+ENCODER_DECLARE_EXPORT char * ret(char * stream, unsigned short pop);
608+ENCODER_DECLARE_EXPORT char * ret(char * stream, const Imm_Opnd & imm);
609+
610+// string operations
611+ENCODER_DECLARE_EXPORT char * set_d(char * stream, bool set);
612+ENCODER_DECLARE_EXPORT char * scas(char * stream, unsigned char prefix);
613+ENCODER_DECLARE_EXPORT char * stos(char * stream, unsigned char prefix);
614+
615+// floating-point instructions
616+
617+// st(0) = st(0) fp_op m{32,64}real
618+//!char * fp_op_mem(char * stream, FP_Opcode opc,const M_Opnd& mem,int is_double);
619+
620+// st(0) = st(0) fp_op st(i)
621+//!char *fp_op(char * stream, FP_Opcode opc,unsigned i);
622+
623+// st(i) = st(i) fp_op st(0) ; optionally pop stack
624+//!char * fp_op(char * stream, FP_Opcode opc,unsigned i,unsigned pop_stk);
625+
626+// compare st(0),st(1) and pop stack twice
627+//!char * fcompp(char * stream);
628+ENCODER_DECLARE_EXPORT char * fldcw(char * stream, const M_Opnd & mem);
629+ENCODER_DECLARE_EXPORT char * fnstcw(char * stream, const M_Opnd & mem);
630+ENCODER_DECLARE_EXPORT char * fnstsw(char * stream);
631+//!char * fchs(char * stream);
632+//!char * frem(char * stream);
633+//!char * fxch(char * stream,unsigned i);
634+//!char * fcomip(char * stream, unsigned i);
635+
636+// load from memory (as fp) into fp register stack
637+ENCODER_DECLARE_EXPORT char * fld(char * stream, const M_Opnd & m, bool is_double);
638+//!char *fld80(char * stream,const M_Opnd& mem);
639+
640+// load from memory (as int) into fp register stack
641+//!char * fild(char * stream,const M_Opnd& mem,int is_long);
642+
643+// push st(i) onto fp register stack
644+//!char * fld(char * stream,unsigned i);
645+
646+// push the constants 0.0 and 1.0 onto the fp register stack
647+//!char * fldz(char * stream);
648+//!char * fld1(char * stream);
649+
650+// store stack to memory (as int), always popping the stack
651+ENCODER_DECLARE_EXPORT char * fist(char * stream, const M_Opnd & mem, bool is_long, bool pop_stk);
652+// store stack to to memory (as fp), optionally popping the stack
653+ENCODER_DECLARE_EXPORT char * fst(char * stream, const M_Opnd & m, bool is_double, bool pop_stk);
654+// store ST(0) to ST(i), optionally popping the stack. Takes 1 clock
655+ENCODER_DECLARE_EXPORT char * fst(char * stream, unsigned i, bool pop_stk);
656+
657+//!char * pushad(char * stream);
658+//!char * pushfd(char * stream);
659+//!char * popad(char * stream);
660+//!char * popfd(char * stream);
661+
662+// stack frame allocation instructions: enter & leave
663+//
664+// enter frame_size
665+//
666+// is equivalent to:
667+//
668+// push ebp
669+// mov ebp,esp
670+// sub esp,frame_size
671+//
672+//!char *enter(char * stream,const Imm_Opnd& imm);
673+
674+// leave
675+// is equivalent to:
676+//
677+// mov esp,ebp
678+// pop ebp
679+//!char *leave(char * stream);
680+
681+// sahf loads SF, ZF, AF, PF, and CF flags from eax
682+//!char *sahf(char * stream);
683+
684+// Intrinsic FP math functions
685+
686+//!char *math_fsin(char * stream);
687+//!char *math_fcos(char * stream);
688+//!char *math_fabs(char * stream);
689+//!char *math_fpatan(char * stream);
690+ENCODER_DECLARE_EXPORT char * fprem(char * stream);
691+ENCODER_DECLARE_EXPORT char * fprem1(char * stream);
692+//!char *math_frndint(char * stream);
693+//!char *math_fptan(char * stream);
694+
695+//
696+// Add 1-7 bytes padding, with as few instructions as possible,
697+// with no effect on the processor state (e.g., registers, flags)
698+//
699+//!char *padding(char * stream, unsigned num);
700+
701+// prolog and epilog code generation
702+//- char *prolog(char * stream,unsigned frame_size,unsigned reg_save_mask);
703+//- char *epilog(char * stream,unsigned reg_save_mask);
704+
705+//!extern R_Opnd reg_operand_array[];
706+
707+// fsave and frstor
708+//!char *fsave(char * stream);
709+//!char *frstor(char * stream);
710+
711+// lahf : Load Status Flags into AH Register
712+//!char *lahf(char * stream);
713+
714+// mfence : Memory Fence
715+//!char *mfence(char * stream);
716+
717+#endif // _VM_ENCODER_H_
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/libenc/encoder.inl
@@ -0,0 +1,863 @@
1+/*
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+/**
18+ * @author Alexander V. Astapchuk
19+ */
20+#include <stdio.h>
21+#include <assert.h>
22+#include <limits.h>
23+
24+extern const RegName map_of_regno_2_regname[];
25+extern const OpndSize map_of_EncoderOpndSize_2_RealOpndSize[];
26+extern const Mnemonic map_of_alu_opcode_2_mnemonic[];
27+extern const Mnemonic map_of_shift_opcode_2_mnemonic[];
28+
29+// S_ stands for 'Signed'
30+extern const Mnemonic S_map_of_condition_code_2_branch_mnemonic[];
31+// U_ stands for 'Unsigned'
32+extern const Mnemonic U_map_of_condition_code_2_branch_mnemonic[];
33+
34+inline static RegName map_reg(Reg_No r) {
35+ assert(r >= 0 && r <= n_reg);
36+ return map_of_regno_2_regname[r];
37+}
38+
39+inline static OpndSize map_size(Opnd_Size o_size) {
40+ assert(o_size >= 0 && o_size <= n_size);
41+ return map_of_EncoderOpndSize_2_RealOpndSize[o_size];
42+}
43+
44+inline static Mnemonic map_alu(ALU_Opcode alu) {
45+ assert(alu >= 0 && alu < n_alu);
46+ return map_of_alu_opcode_2_mnemonic[alu];
47+}
48+
49+inline static Mnemonic map_shift(Shift_Opcode shc) {
50+ assert(shc >= 0 && shc < n_shift);
51+ return map_of_shift_opcode_2_mnemonic[shc];
52+}
53+
54+inline bool fit8(int64 val) {
55+ return (CHAR_MIN <= val) && (val <= CHAR_MAX);
56+}
57+
58+inline bool fit32(int64 val) {
59+ return (INT_MIN <= val) && (val <= INT_MAX);
60+}
61+
62+inline static void add_r(EncoderBase::Operands & args, const R_Opnd & r, Opnd_Size sz, OpndExt ext = OpndExt_None) {
63+ RegName reg = map_reg(r.reg_no());
64+ if (sz != n_size) {
65+ OpndSize size = map_size(sz);
66+ if (size != getRegSize(reg)) {
67+ reg = getAliasReg(reg, size);
68+ }
69+ }
70+ args.add(EncoderBase::Operand(reg, ext));
71+}
72+
73+inline static void add_m(EncoderBase::Operands & args, const M_Opnd & m, Opnd_Size sz, OpndExt ext = OpndExt_None) {
74+ assert(n_size != sz);
75+ args.add(EncoderBase::Operand(map_size(sz),
76+ map_reg(m.base().reg_no()), map_reg(m.index().reg_no()),
77+ (unsigned)m.scale().get_value(), (int)m.disp().get_value(), ext));
78+}
79+
80+inline static void add_rm(EncoderBase::Operands & args, const RM_Opnd & rm, Opnd_Size sz, OpndExt ext = OpndExt_None) {
81+ rm.is_reg() ? add_r(args, (R_Opnd &)rm, sz, ext) : add_m(args, (M_Opnd &)rm, sz, ext);
82+}
83+
84+inline static void add_xmm(EncoderBase::Operands & args, const XMM_Opnd & xmm, bool dbl) {
85+ // Gregory -
86+ // XMM registers indexes in Reg_No enum are shifted by xmm0_reg, their indexes
87+ // don't start with 0, so it is necessary to subtract xmm0_reg index from
88+ // xmm.get_idx() value
89+ assert(xmm.get_idx() >= xmm0_reg);
90+ return args.add((RegName)( (dbl ? RegName_XMM0D : RegName_XMM0S) + xmm.get_idx() -
91+ xmm0_reg));
92+}
93+
94+inline static void add_fp(EncoderBase::Operands & args, unsigned i, bool dbl) {
95+ return args.add((RegName)( (dbl ? RegName_FP0D : RegName_FP0S) + i));
96+}
97+
98+inline static void add_imm(EncoderBase::Operands & args, const Imm_Opnd & imm) {
99+ assert(n_size != imm.get_size());
100+ args.add(EncoderBase::Operand(map_size(imm.get_size()), imm.get_value(),
101+ imm.is_signed() ? OpndExt_Signed : OpndExt_Zero));
102+}
103+
104+ENCODER_DECLARE_EXPORT char * prefix(char * stream, InstrPrefix p) {
105+ *stream = (char)p;
106+ return stream + 1;
107+}
108+
109+// stack push and pop instructions
110+ENCODER_DECLARE_EXPORT char * push(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
111+ EncoderBase::Operands args;
112+ add_rm(args, rm, sz);
113+ return (char*)EncoderBase::encode(stream, Mnemonic_PUSH, args);
114+}
115+
116+ENCODER_DECLARE_EXPORT char * push(char * stream, const Imm_Opnd & imm) {
117+ EncoderBase::Operands args;
118+#ifdef _EM64T_
119+ add_imm(args, imm);
120+#else
121+ // we need this workaround to be compatible with the former ia32 encoder implementation
122+ add_imm(args, Imm_Opnd(size_32, imm.get_value()));
123+#endif
124+ return EncoderBase::encode(stream, Mnemonic_PUSH, args);
125+}
126+
127+ENCODER_DECLARE_EXPORT char * pop(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
128+ EncoderBase::Operands args;
129+ add_rm(args, rm, sz);
130+ return (char*)EncoderBase::encode(stream, Mnemonic_POP, args);
131+}
132+
133+// cmpxchg or xchg
134+ENCODER_DECLARE_EXPORT char * cmpxchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) {
135+ EncoderBase::Operands args;
136+ add_rm(args, rm, sz);
137+ add_r(args, r, sz);
138+ RegName implicitReg = getAliasReg(RegName_EAX, map_size(sz));
139+ args.add(implicitReg);
140+ return (char*)EncoderBase::encode(stream, Mnemonic_CMPXCHG, args);
141+}
142+
143+ENCODER_DECLARE_EXPORT char * xchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) {
144+ EncoderBase::Operands args;
145+ add_rm(args, rm, sz);
146+ add_r(args, r, sz);
147+ return (char*)EncoderBase::encode(stream, Mnemonic_XCHG, args);
148+}
149+
150+// inc(rement), dec(rement), not, neg(ate) instructions
151+ENCODER_DECLARE_EXPORT char * inc(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
152+ EncoderBase::Operands args;
153+ add_rm(args, rm, sz);
154+ return (char*)EncoderBase::encode(stream, Mnemonic_INC, args);
155+}
156+
157+ENCODER_DECLARE_EXPORT char * dec(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
158+ EncoderBase::Operands args;
159+ add_rm(args, rm, sz);
160+ return (char*)EncoderBase::encode(stream, Mnemonic_DEC, args);
161+}
162+
163+ENCODER_DECLARE_EXPORT char * _not(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
164+ EncoderBase::Operands args;
165+ add_rm(args, rm, sz);
166+ return (char*)EncoderBase::encode(stream, Mnemonic_NOT, args);
167+}
168+
169+ENCODER_DECLARE_EXPORT char * neg(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
170+ EncoderBase::Operands args;
171+ add_rm(args, rm, sz);
172+ return (char*)EncoderBase::encode(stream, Mnemonic_NEG, args);
173+}
174+
175+ENCODER_DECLARE_EXPORT char * nop(char * stream) {
176+ EncoderBase::Operands args;
177+ return (char*)EncoderBase::encode(stream, Mnemonic_NOP, args);
178+}
179+
180+ENCODER_DECLARE_EXPORT char * int3(char * stream) {
181+ EncoderBase::Operands args;
182+ return (char*)EncoderBase::encode(stream, Mnemonic_INT3, args);
183+}
184+
185+// alu instructions: add, or, adc, sbb, and, sub, xor, cmp
186+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) {
187+ EncoderBase::Operands args;
188+ add_rm(args, rm, sz);
189+ add_imm(args, imm);
190+ return (char*)EncoderBase::encode(stream, map_alu(opc), args);
191+};
192+
193+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz) {
194+ EncoderBase::Operands args;
195+ add_rm(args, m, sz);
196+ add_rm(args, r, sz);
197+ return (char*)EncoderBase::encode(stream, map_alu(opc), args);
198+}
199+
200+ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
201+ EncoderBase::Operands args;
202+ add_rm(args, r, sz);
203+ add_rm(args, rm, sz);
204+ return (char*)EncoderBase::encode(stream, map_alu(opc), args);
205+}
206+
207+// test instruction
208+ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) {
209+ EncoderBase::Operands args;
210+ add_rm(args, rm, sz);
211+ assert(imm.get_size() <= sz);
212+ add_imm(args, imm);
213+ return (char*)EncoderBase::encode(stream, Mnemonic_TEST, args);
214+}
215+
216+ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) {
217+ EncoderBase::Operands args;
218+ add_rm(args, rm, sz);
219+ add_r(args, r, sz);
220+ return (char*)EncoderBase::encode(stream, Mnemonic_TEST, args);
221+}
222+
223+// shift instructions: shl, shr, sar, shld, shrd
224+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) {
225+ EncoderBase::Operands args;
226+ add_rm(args, rm, sz);
227+ add_imm(args, imm);
228+ return (char*)EncoderBase::encode(stream, map_shift(shc), args);
229+}
230+
231+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, Opnd_Size sz) {
232+ EncoderBase::Operands args;
233+ add_rm(args, rm, sz);
234+ args.add(RegName_CL);
235+ return (char*)EncoderBase::encode(stream, map_shift(shc), args);
236+}
237+
238+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm,
239+ const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz) {
240+ EncoderBase::Operands args;
241+ assert(shc == shld_opc || shc == shrd_opc);
242+ add_rm(args, rm, sz);
243+ add_r(args, r, sz);
244+ add_imm(args, imm);
245+ return (char*)EncoderBase::encode(stream, map_shift(shc), args);
246+}
247+
248+ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm,
249+ const R_Opnd & r, Opnd_Size sz) {
250+ EncoderBase::Operands args;
251+ assert(shc == shld_opc || shc == shrd_opc);
252+ add_rm(args, rm, sz);
253+ add_r(args, r, sz);
254+ args.add(RegName_CL);
255+ return (char*)EncoderBase::encode(stream, map_shift(shc), args);
256+}
257+
258+// multiply instructions: mul, imul
259+ENCODER_DECLARE_EXPORT char * mul(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
260+ EncoderBase::Operands args;
261+ args.add(RegName_EDX);
262+ args.add(RegName_EAX);
263+ add_rm(args, rm, sz);
264+ return (char*)EncoderBase::encode(stream, Mnemonic_MUL, args);
265+}
266+
267+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
268+ EncoderBase::Operands args;
269+ add_r(args, r, sz);
270+ add_rm(args, rm, sz);
271+ return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args);
272+}
273+
274+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz) {
275+ EncoderBase::Operands args;
276+ add_r(args, r, sz);
277+ add_imm(args, imm);
278+ return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args);
279+}
280+
281+ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm,
282+ const Imm_Opnd & imm, Opnd_Size sz) {
283+ EncoderBase::Operands args;
284+ add_r(args, r, sz);
285+ add_rm(args, rm, sz);
286+ add_imm(args, imm);
287+ return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args);
288+}
289+
290+// divide instructions: div, idiv
291+ENCODER_DECLARE_EXPORT char * idiv(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
292+ EncoderBase::Operands args;
293+#ifdef _EM64T_
294+ add_r(args, rdx_opnd, sz);
295+ add_r(args, rax_opnd, sz);
296+#else
297+ add_r(args, edx_opnd, sz);
298+ add_r(args, eax_opnd, sz);
299+#endif
300+ add_rm(args, rm, sz);
301+ return (char*)EncoderBase::encode(stream, Mnemonic_IDIV, args);
302+}
303+
304+ENCODER_DECLARE_EXPORT char * div(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
305+ EncoderBase::Operands args;
306+#ifdef _EM64T_
307+ add_r(args, rdx_opnd, sz);
308+ add_r(args, rax_opnd, sz);
309+#else
310+ add_r(args, edx_opnd, sz);
311+ add_r(args, eax_opnd, sz);
312+#endif
313+ add_rm(args, rm, sz);
314+ return (char*)EncoderBase::encode(stream, Mnemonic_DIV, args);
315+}
316+
317+// data movement: mov
318+ENCODER_DECLARE_EXPORT char * mov(char * stream, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz) {
319+ EncoderBase::Operands args;
320+ add_m(args, m, sz);
321+ add_r(args, r, sz);
322+ return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args);
323+}
324+
325+ENCODER_DECLARE_EXPORT char * mov(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
326+ EncoderBase::Operands args;
327+ add_r(args, r, sz);
328+ add_rm(args, rm, sz);
329+ return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args);
330+}
331+
332+ENCODER_DECLARE_EXPORT char * mov(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) {
333+ EncoderBase::Operands args;
334+ add_rm(args, rm, sz);
335+ add_imm(args, imm);
336+ return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args);
337+}
338+
339+ENCODER_DECLARE_EXPORT char * movd(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm) {
340+ EncoderBase::Operands args;
341+ add_rm(args, rm, size_32);
342+ add_xmm(args, xmm, false);
343+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVD, args);
344+}
345+
346+ENCODER_DECLARE_EXPORT char * movd(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm) {
347+ EncoderBase::Operands args;
348+ add_xmm(args, xmm, false);
349+ add_rm(args, rm, size_32);
350+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVD, args);
351+}
352+
353+ENCODER_DECLARE_EXPORT char * movq(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm) {
354+ EncoderBase::Operands args;
355+ add_rm(args, rm, size_64);
356+ add_xmm(args, xmm, true);
357+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVQ, args);
358+}
359+
360+ENCODER_DECLARE_EXPORT char * movq(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm) {
361+ EncoderBase::Operands args;
362+ add_xmm(args, xmm, true);
363+ add_rm(args, rm, size_64);
364+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVQ, args);
365+}
366+
367+ENCODER_DECLARE_EXPORT char * movsx(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
368+ EncoderBase::Operands args;
369+ add_r(args, r, n_size);
370+ add_rm(args, rm, sz, OpndExt_Signed);
371+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVSX, args);
372+}
373+
374+ENCODER_DECLARE_EXPORT char * movzx(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
375+ EncoderBase::Operands args;
376+ add_r(args, r, n_size);
377+ // movzx r64, r/m32 is not available on em64t
378+ // mov r32, r/m32 should zero out upper bytes
379+ assert(sz <= size_16);
380+ add_rm(args, rm, sz, OpndExt_Zero);
381+ return (char*)EncoderBase::encode(stream, Mnemonic_MOVZX, args);
382+}
383+
384+// sse mov
385+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
386+ EncoderBase::Operands args;
387+ add_xmm(args, xmm, dbl);
388+ add_m(args, mem, dbl ? size_64 : size_32);
389+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args);
390+}
391+
392+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const M_Opnd & mem, const XMM_Opnd & xmm, bool dbl) {
393+ EncoderBase::Operands args;
394+ add_m(args, mem, dbl ? size_64 : size_32);
395+ add_xmm(args, xmm, dbl);
396+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args);
397+}
398+
399+ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
400+ EncoderBase::Operands args;
401+ add_xmm(args, xmm0, dbl);
402+ add_xmm(args, xmm1, dbl);
403+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args );
404+}
405+
406+// sse add, sub, mul, div
407+ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
408+ EncoderBase::Operands args;
409+ add_xmm(args, xmm, dbl);
410+ add_m(args, mem, dbl ? size_64 : size_32);
411+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_ADDSD : Mnemonic_ADDSS, args);
412+}
413+
414+ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
415+ EncoderBase::Operands args;
416+ add_xmm(args, xmm0, dbl);
417+ add_xmm(args, xmm1, dbl);
418+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_ADDSD : Mnemonic_ADDSS, args);
419+}
420+
421+ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
422+ EncoderBase::Operands args;
423+ add_xmm(args, xmm, dbl);
424+ add_m(args, mem, dbl ? size_64 : size_32);
425+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_SUBSD : Mnemonic_SUBSS, args);
426+}
427+
428+ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
429+ EncoderBase::Operands args;
430+ add_xmm(args, xmm0, dbl);
431+ add_xmm(args, xmm1, dbl);
432+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_SUBSD : Mnemonic_SUBSS, args);
433+}
434+
435+ENCODER_DECLARE_EXPORT char * sse_mul( char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
436+ EncoderBase::Operands args;
437+ add_xmm(args, xmm, dbl);
438+ add_m(args, mem, dbl ? size_64 : size_32);
439+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MULSD : Mnemonic_MULSS, args);
440+}
441+
442+ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd& xmm0, const XMM_Opnd& xmm1, bool dbl) {
443+ EncoderBase::Operands args;
444+ add_xmm(args, xmm0, dbl);
445+ add_xmm(args, xmm1, dbl);
446+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MULSD : Mnemonic_MULSS, args);
447+}
448+
449+ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
450+ EncoderBase::Operands args;
451+ add_xmm(args, xmm, dbl);
452+ add_m(args, mem, dbl ? size_64 : size_32);
453+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_DIVSD : Mnemonic_DIVSS, args);
454+}
455+
456+ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
457+ EncoderBase::Operands args;
458+ add_xmm(args, xmm0, dbl);
459+ add_xmm(args, xmm1, dbl);
460+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_DIVSD : Mnemonic_DIVSS, args);
461+}
462+
463+ENCODER_DECLARE_EXPORT char * sse_xor(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) {
464+ EncoderBase::Operands args;
465+ add_xmm(args, xmm0, true);
466+ add_xmm(args, xmm1, true);
467+ return (char*)EncoderBase::encode(stream, Mnemonic_PXOR, args);
468+}
469+
470+ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
471+ EncoderBase::Operands args;
472+ add_xmm(args, xmm0, true);
473+ add_xmm(args, xmm1, true);
474+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_COMISD : Mnemonic_COMISS, args);
475+}
476+
477+ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem, bool dbl) {
478+ EncoderBase::Operands args;
479+ add_xmm(args, xmm0, dbl);
480+ add_m(args, mem, dbl ? size_64 : size_32);
481+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_COMISD : Mnemonic_COMISS, args);
482+}
483+
484+// sse conversions
485+ENCODER_DECLARE_EXPORT char * sse_cvt_si(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) {
486+ EncoderBase::Operands args;
487+ add_xmm(args, xmm, dbl);
488+ add_m(args, mem, size_32);
489+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTSI2SD : Mnemonic_CVTSI2SS, args);
490+}
491+
492+ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const M_Opnd & mem, bool dbl) {
493+ EncoderBase::Operands args;
494+ add_rm(args, reg, size_32);
495+ add_m(args, mem, dbl ? size_64 : size_32);
496+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI, args);
497+}
498+
499+ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const XMM_Opnd & xmm, bool dbl) {
500+ EncoderBase::Operands args;
501+ add_rm(args, reg, size_32);
502+ add_xmm(args, xmm, dbl);
503+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI, args);
504+}
505+
506+ENCODER_DECLARE_EXPORT char * sse_cvt_fp2dq(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
507+ EncoderBase::Operands args;
508+ add_xmm(args, xmm0, dbl);
509+ add_xmm(args, xmm1, dbl);
510+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTPD2DQ : Mnemonic_CVTTPS2DQ, args);
511+}
512+
513+ENCODER_DECLARE_EXPORT char * sse_cvt_dq2fp(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) {
514+ EncoderBase::Operands args;
515+ add_xmm(args, xmm0, dbl);
516+ add_xmm(args, xmm1, dbl);
517+ return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTDQ2PD : Mnemonic_CVTDQ2PS, args);
518+}
519+
520+ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem64) {
521+ EncoderBase::Operands args;
522+ add_xmm(args, xmm0, false);
523+ add_m(args, mem64, size_64);
524+ return (char*)EncoderBase::encode(stream, Mnemonic_CVTSD2SS, args);
525+}
526+
527+ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) {
528+ EncoderBase::Operands args;
529+ add_xmm(args, xmm0, false);
530+ add_xmm(args, xmm1, true);
531+ return (char*)EncoderBase::encode(stream, Mnemonic_CVTSD2SS, args);
532+}
533+
534+ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem32) {
535+ EncoderBase::Operands args;
536+ add_xmm(args, xmm0, true);
537+ add_m(args, mem32, size_32);
538+ return (char*)EncoderBase::encode(stream, Mnemonic_CVTSS2SD, args);
539+}
540+
541+ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) {
542+ EncoderBase::Operands args;
543+ add_xmm(args, xmm0, true);
544+ add_xmm(args, xmm1, false);
545+ return (char*)EncoderBase::encode(stream, Mnemonic_CVTSS2SD, args);
546+}
547+
548+// condition operations
549+ENCODER_DECLARE_EXPORT char *cmov(char * stream, ConditionCode cc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) {
550+ EncoderBase::Operands args;
551+ add_r(args, r, sz);
552+ add_rm(args, rm, sz);
553+ return (char*)EncoderBase::encode(stream, (Mnemonic)(Mnemonic_CMOVcc + cc), args);
554+}
555+
556+ENCODER_DECLARE_EXPORT char * setcc(char * stream, ConditionCode cc, const RM_Opnd & rm8) {
557+ EncoderBase::Operands args;
558+ add_rm(args, rm8, size_8);
559+ return (char*)EncoderBase::encode(stream, (Mnemonic)(Mnemonic_SETcc + cc), args);
560+}
561+
562+// load effective address: lea
563+ENCODER_DECLARE_EXPORT char * lea(char * stream, const R_Opnd & r, const M_Opnd & m, Opnd_Size sz) {
564+ EncoderBase::Operands args;
565+ add_r(args, r, sz);
566+ add_m(args, m, sz);
567+ return (char*)EncoderBase::encode(stream, Mnemonic_LEA, args);
568+}
569+
570+ENCODER_DECLARE_EXPORT char * cdq(char * stream) {
571+ EncoderBase::Operands args;
572+ args.add(RegName_EDX);
573+ args.add(RegName_EAX);
574+ return (char*)EncoderBase::encode(stream, Mnemonic_CDQ, args);
575+}
576+
577+ENCODER_DECLARE_EXPORT char * wait(char * stream) {
578+ return (char*)EncoderBase::encode(stream, Mnemonic_WAIT, EncoderBase::Operands());
579+}
580+
581+// control-flow instructions
582+
583+// loop
584+ENCODER_DECLARE_EXPORT char * loop(char * stream, const Imm_Opnd & imm) {
585+ EncoderBase::Operands args;
586+ assert(imm.get_size() == size_8);
587+ args.add(RegName_ECX);
588+ add_imm(args, imm);
589+ return (char*)EncoderBase::encode(stream, Mnemonic_LOOP, args);
590+}
591+
592+// jump
593+ENCODER_DECLARE_EXPORT char * jump8(char * stream, const Imm_Opnd & imm) {
594+ EncoderBase::Operands args;
595+ assert(imm.get_size() == size_8);
596+ add_imm(args, imm);
597+ return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args);
598+}
599+
600+ENCODER_DECLARE_EXPORT char * jump32(char * stream, const Imm_Opnd & imm) {
601+ EncoderBase::Operands args;
602+ assert(imm.get_size() == size_32);
603+ add_imm(args, imm);
604+ return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args);
605+}
606+
607+ENCODER_DECLARE_EXPORT char * jump(char * stream, const RM_Opnd & rm, Opnd_Size sz) {
608+ EncoderBase::Operands args;
609+ add_rm(args, rm, sz);
610+ return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args);
611+}
612+
613+/**
614+ * @note On EM64T: if target lies beyond 2G (does not fit into 32 bit
615+ * offset) then generates indirect jump using RAX (whose content is
616+ * destroyed).
617+ */
618+ENCODER_DECLARE_EXPORT char * jump(char * stream, char * target) {
619+#ifdef _EM64T_
620+ int64 offset = target - stream;
621+ // sub 2 bytes for the short version
622+ offset -= 2;
623+ if (fit8(offset)) {
624+ // use 8-bit signed relative form
625+ return jump8(stream, Imm_Opnd(size_8, offset));
626+ } else if (fit32(offset)) {
627+ // sub 5 (3 + 2)bytes for the long version
628+ offset -= 3;
629+ // use 32-bit signed relative form
630+ return jump32(stream, Imm_Opnd(size_32, offset));
631+ }
632+ // need to use absolute indirect jump
633+ stream = mov(stream, rax_opnd, Imm_Opnd(size_64, (int64)target), size_64);
634+ return jump(stream, rax_opnd, size_64);
635+#else
636+ I_32 offset = target - stream;
637+ // sub 2 bytes for the short version
638+ offset -= 2;
639+ if (fit8(offset)) {
640+ // use 8-bit signed relative form
641+ return jump8(stream, Imm_Opnd(size_8, offset));
642+ }
643+ // sub 5 (3 + 2) bytes for the long version
644+ offset -= 3;
645+ // use 32-bit signed relative form
646+ return jump32(stream, Imm_Opnd(size_32, offset));
647+#endif
648+}
649+
650+// branch
651+ENCODER_DECLARE_EXPORT char * branch8(char * stream, ConditionCode cond,
652+ const Imm_Opnd & imm,
653+ InstrPrefix pref)
654+{
655+ if (pref != no_prefix) {
656+ assert(pref == hint_branch_taken_prefix || pref == hint_branch_taken_prefix);
657+ stream = prefix(stream, pref);
658+ }
659+ Mnemonic m = (Mnemonic)(Mnemonic_Jcc + cond);
660+ EncoderBase::Operands args;
661+ assert(imm.get_size() == size_8);
662+ add_imm(args, imm);
663+ return (char*)EncoderBase::encode(stream, m, args);
664+}
665+
666+ENCODER_DECLARE_EXPORT char * branch32(char * stream, ConditionCode cond,
667+ const Imm_Opnd & imm,
668+ InstrPrefix pref)
669+{
670+ if (pref != no_prefix) {
671+ assert(pref == hint_branch_taken_prefix || pref == hint_branch_taken_prefix);
672+ stream = prefix(stream, pref);
673+ }
674+ Mnemonic m = (Mnemonic)(Mnemonic_Jcc + cond);
675+ EncoderBase::Operands args;
676+ assert(imm.get_size() == size_32);
677+ add_imm(args, imm);
678+ return (char*)EncoderBase::encode(stream, m, args);
679+}
680+
681+/*
682+ENCODER_DECLARE_EXPORT char * branch(char * stream, ConditionCode cc, const char * target, InstrPrefix prefix) {
683+// sub 2 bytes for the short version
684+int64 offset = stream-target-2;
685+if( fit8(offset) ) {
686+return branch8(stream, cc, Imm_Opnd(size_8, (char)offset), is_signed);
687+}
688+return branch32(stream, cc, Imm_Opnd(size_32, (int)offset), is_signed);
689+}
690+*/
691+
692+// call
693+ENCODER_DECLARE_EXPORT char * call(char * stream, const Imm_Opnd & imm)
694+{
695+ EncoderBase::Operands args;
696+ add_imm(args, imm);
697+ return (char*)EncoderBase::encode(stream, Mnemonic_CALL, args);
698+}
699+
700+ENCODER_DECLARE_EXPORT char * call(char * stream, const RM_Opnd & rm,
701+ Opnd_Size sz)
702+{
703+ EncoderBase::Operands args;
704+ add_rm(args, rm, sz);
705+ return (char*)EncoderBase::encode(stream, Mnemonic_CALL, args);
706+}
707+
708+/**
709+* @note On EM64T: if target lies beyond 2G (does not fit into 32 bit
710+* offset) then generates indirect jump using RAX (whose content is
711+* destroyed).
712+*/
713+ENCODER_DECLARE_EXPORT char * call(char * stream, const char * target)
714+{
715+#ifdef _EM64T_
716+ int64 offset = target - stream;
717+ if (fit32(offset)) {
718+ offset -= 5; // sub 5 bytes for this instruction
719+ Imm_Opnd imm(size_32, offset);
720+ return call(stream, imm);
721+ }
722+ // need to use absolute indirect call
723+ stream = mov(stream, rax_opnd, Imm_Opnd(size_64, (int64)target), size_64);
724+ return call(stream, rax_opnd, size_64);
725+#else
726+ I_32 offset = target - stream;
727+ offset -= 5; // sub 5 bytes for this instruction
728+ Imm_Opnd imm(size_32, offset);
729+ return call(stream, imm);
730+#endif
731+}
732+
733+// return instruction
734+ENCODER_DECLARE_EXPORT char * ret(char * stream)
735+{
736+ EncoderBase::Operands args;
737+ return (char*)EncoderBase::encode(stream, Mnemonic_RET, args);
738+}
739+
740+ENCODER_DECLARE_EXPORT char * ret(char * stream, const Imm_Opnd & imm)
741+{
742+ EncoderBase::Operands args;
743+ // TheManual says imm can be 16-bit only
744+ //assert(imm.get_size() <= size_16);
745+ args.add(EncoderBase::Operand(map_size(size_16), imm.get_value()));
746+ return (char*)EncoderBase::encode(stream, Mnemonic_RET, args);
747+}
748+
749+ENCODER_DECLARE_EXPORT char * ret(char * stream, unsigned short pop)
750+{
751+ // TheManual says it can only be imm16
752+ EncoderBase::Operands args(EncoderBase::Operand(OpndSize_16, pop, OpndExt_Zero));
753+ return (char*)EncoderBase::encode(stream, Mnemonic_RET, args);
754+}
755+
756+// floating-point instructions
757+ENCODER_DECLARE_EXPORT char * fld(char * stream, const M_Opnd & m,
758+ bool is_double) {
759+ EncoderBase::Operands args;
760+ // a fake FP register as operand
761+ add_fp(args, 0, is_double);
762+ add_m(args, m, is_double ? size_64 : size_32);
763+ return (char*)EncoderBase::encode(stream, Mnemonic_FLD, args);
764+}
765+
766+ENCODER_DECLARE_EXPORT char * fist(char * stream, const M_Opnd & mem,
767+ bool is_long, bool pop_stk)
768+{
769+ EncoderBase::Operands args;
770+ if (pop_stk) {
771+ add_m(args, mem, is_long ? size_64 : size_32);
772+ // a fake FP register as operand
773+ add_fp(args, 0, is_long);
774+ return (char*)EncoderBase::encode(stream, Mnemonic_FISTP, args);
775+ }
776+ // only 32-bit operands are supported
777+ assert(is_long == false);
778+ add_m(args, mem, size_32);
779+ add_fp(args, 0, false);
780+ return (char*)EncoderBase::encode(stream, Mnemonic_FIST, args);
781+}
782+
783+ENCODER_DECLARE_EXPORT char * fst(char * stream, const M_Opnd & m,
784+ bool is_double, bool pop_stk)
785+{
786+ EncoderBase::Operands args;
787+ add_m(args, m, is_double ? size_64 : size_32);
788+ // a fake FP register as operand
789+ add_fp(args, 0, is_double);
790+ return (char*)EncoderBase::encode(stream,
791+ pop_stk ? Mnemonic_FSTP : Mnemonic_FST,
792+ args);
793+}
794+
795+ENCODER_DECLARE_EXPORT char * fst(char * stream, unsigned i, bool pop_stk)
796+{
797+ EncoderBase::Operands args;
798+ add_fp(args, i, true);
799+ return (char*)EncoderBase::encode(stream,
800+ pop_stk ? Mnemonic_FSTP : Mnemonic_FST,
801+ args);
802+}
803+
804+ENCODER_DECLARE_EXPORT char * fldcw(char * stream, const M_Opnd & mem) {
805+ EncoderBase::Operands args;
806+ add_m(args, mem, size_16);
807+ return (char*)EncoderBase::encode(stream, Mnemonic_FLDCW, args);
808+}
809+
810+ENCODER_DECLARE_EXPORT char * fnstcw(char * stream, const M_Opnd & mem) {
811+ EncoderBase::Operands args;
812+ add_m(args, mem, size_16);
813+ return (char*)EncoderBase::encode(stream, Mnemonic_FNSTCW, args);
814+}
815+
816+ENCODER_DECLARE_EXPORT char * fnstsw(char * stream)
817+{
818+ return (char*)EncoderBase::encode(stream, Mnemonic_FNSTCW,
819+ EncoderBase::Operands());
820+}
821+
822+// string operations
823+ENCODER_DECLARE_EXPORT char * set_d(char * stream, bool set) {
824+ EncoderBase::Operands args;
825+ return (char*)EncoderBase::encode(stream,
826+ set ? Mnemonic_STD : Mnemonic_CLD,
827+ args);
828+}
829+
830+ENCODER_DECLARE_EXPORT char * scas(char * stream, unsigned char prefix)
831+{
832+ EncoderBase::Operands args;
833+ if (prefix != no_prefix) {
834+ assert(prefix == prefix_repnz || prefix == prefix_repz);
835+ *stream = prefix;
836+ ++stream;
837+ }
838+ return (char*)EncoderBase::encode(stream, Mnemonic_SCAS, args);
839+}
840+
841+ENCODER_DECLARE_EXPORT char * stos(char * stream, unsigned char prefix)
842+{
843+ if (prefix != no_prefix) {
844+ assert(prefix == prefix_rep);
845+ *stream = prefix;
846+ ++stream;
847+ }
848+
849+ EncoderBase::Operands args;
850+ return (char*)EncoderBase::encode(stream, Mnemonic_STOS, args);
851+}
852+
853+// Intrinsic FP math functions
854+
855+ENCODER_DECLARE_EXPORT char * fprem(char * stream) {
856+ return (char*)EncoderBase::encode(stream, Mnemonic_FPREM,
857+ EncoderBase::Operands());
858+}
859+
860+ENCODER_DECLARE_EXPORT char * fprem1(char * stream) {
861+ return (char*)EncoderBase::encode(stream, Mnemonic_FPREM1,
862+ EncoderBase::Operands());
863+}
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/load_store.cpp
@@ -0,0 +1,458 @@
1+/* libs/pixelflinger/codeflinger/x86/load_store.cpp
2+**
3+** Copyright 2006, The Android Open Source Project
4+**
5+** Licensed under the Apache License, Version 2.0 (the "License");
6+** you may not use this file except in compliance with the License.
7+** You may obtain a copy of the License at
8+**
9+** http://www.apache.org/licenses/LICENSE-2.0
10+**
11+** Unless required by applicable law or agreed to in writing, software
12+** distributed under the License is distributed on an "AS IS" BASIS,
13+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+** See the License for the specific language governing permissions and
15+** limitations under the License.
16+*/
17+
18+#include <assert.h>
19+#include <stdio.h>
20+#include <cutils/log.h>
21+
22+#include "codeflinger/x86/GGLX86Assembler.h"
23+
24+namespace android {
25+
26+// ----------------------------------------------------------------------------
27+
28+void GGLX86Assembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
29+{
30+ const int bits = addr.size;
31+ const int inc = (flags & WRITE_BACK)?1:0;
32+ switch (bits) {
33+ case 32:
34+ if (inc) {
35+ MOV_REG_TO_MEM(s.reg, 0, addr.reg);
36+ ADD_IMM_TO_REG(4, addr.reg);
37+ } else {
38+ MOV_REG_TO_MEM(s.reg, 0, addr.reg);
39+ }
40+ break;
41+ case 24:
42+ // 24 bits formats are a little special and used only for RGB
43+ // 0x00BBGGRR is unpacked as R,G,B
44+ MOV_REG_TO_MEM(s.reg, 0, addr.reg, OpndSize_8);
45+ ROR(8, s.reg);
46+ MOV_REG_TO_MEM(s.reg, 1, addr.reg, OpndSize_8);
47+ ROR(8, s.reg);
48+ MOV_REG_TO_MEM(s.reg, 2, addr.reg, OpndSize_8);
49+ if (!(s.flags & CORRUPTIBLE)) {
50+ ROR(16, s.reg);
51+ }
52+ if (inc) {
53+ ADD_IMM_TO_REG(3, addr.reg);
54+ }
55+ break;
56+ case 16:
57+ if (inc) {
58+ MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_16);
59+ ADD_IMM_TO_REG(2, addr.reg);
60+ } else {
61+ MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_16);
62+ }
63+ break;
64+ case 8:
65+ if (inc) {
66+ MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_8);
67+ ADD_IMM_TO_REG(1, addr.reg);
68+ } else {
69+ MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_8);
70+ }
71+ break;
72+ }
73+}
74+
75+void GGLX86Assembler::load(pointer_t& addr, const pixel_t& s, uint32_t flags)
76+{
77+ Scratch scratches(registerFile());
78+ int s0;
79+
80+ const int bits = addr.size;
81+ // WRITE_BACK indicates that the base register will also be updated after loading the data
82+ const int inc = (flags & WRITE_BACK)?1:0;
83+ switch (bits) {
84+ case 32:
85+ if (inc) {
86+ MOV_MEM_TO_REG(0, addr.reg, s.reg);
87+ ADD_IMM_TO_REG(4, addr.reg);
88+
89+ } else MOV_MEM_TO_REG(0, addr.reg, s.reg);
90+ break;
91+ case 24:
92+ // 24 bits formats are a little special and used only for RGB
93+ // R,G,B is packed as 0x00BBGGRR
94+ s0 = scratches.obtain();
95+ if (s.reg != addr.reg) {
96+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg); //R
97+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 1, s0); //G
98+ SHL(8, s0);
99+ OR_REG_TO_REG(s0, s.reg);
100+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 2, s0); //B
101+ SHL(16, s0);
102+ OR_REG_TO_REG(s0, s.reg);
103+ } else {
104+ int s1 = scratches.obtain();
105+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s1); //R
106+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 1, s0); //G
107+ SHL(8, s0);
108+ OR_REG_TO_REG(s0, s1);
109+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 2, s0); //B
110+ SHL(16, s0);
111+ OR_REG_TO_REG(s0, s1);
112+ MOV_REG_TO_REG(s1, s.reg);
113+ scratches.recycle(s1);
114+
115+ }
116+ scratches.recycle(s0);
117+ if (inc)
118+ ADD_IMM_TO_REG(3, addr.reg);
119+ break;
120+ case 16:
121+ if (inc) {
122+ MOVZX_MEM_TO_REG(OpndSize_16, addr.reg, 0, s.reg);
123+ ADD_IMM_TO_REG(2, addr.reg);
124+ }
125+ else MOVZX_MEM_TO_REG(OpndSize_16, addr.reg, 0, s.reg);
126+ break;
127+ case 8:
128+ if (inc) {
129+ MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg);
130+ ADD_IMM_TO_REG(1, addr.reg);
131+ }
132+ else MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg);
133+ break;
134+ }
135+ if (inc) MOV_REG_TO_MEM(addr.reg, addr.offset_ebp, PhysicalReg_EBP);
136+}
137+
138+void GGLX86Assembler::extract(integer_t& d, int s, int h, int l, int bits)
139+{
140+ const int maskLen = h-l;
141+
142+ assert(maskLen<=8);
143+ assert(h);
144+
145+
146+ if (h != bits) {
147+ const int mask = ((1<<maskLen)-1) << l;
148+ MOV_REG_TO_REG(s, d.reg);
149+ AND_IMM_TO_REG(mask, d.reg);// component = packed & mask;
150+ s = d.reg;
151+ }
152+
153+ if (l) {
154+ MOV_REG_TO_REG(s, d.reg);
155+ SHR(l, d.reg);// component = packed >> l;
156+ s = d.reg;
157+ }
158+
159+ if (s != d.reg) {
160+ MOV_REG_TO_REG(s, d.reg);
161+ }
162+
163+ d.s = maskLen;
164+}
165+
166+void GGLX86Assembler::extract(integer_t& d, const pixel_t& s, int component)
167+{
168+ extract(d, s.reg,
169+ s.format.c[component].h,
170+ s.format.c[component].l,
171+ s.size());
172+}
173+
174+void GGLX86Assembler::extract(component_t& d, const pixel_t& s, int component)
175+{
176+ integer_t r(d.reg, 32, d.flags, d.offset_ebp);
177+ extract(r, s.reg,
178+ s.format.c[component].h,
179+ s.format.c[component].l,
180+ s.size());
181+ d = component_t(r);
182+}
183+
184+
185+void GGLX86Assembler::expand(integer_t& d, const component_t& s, int dbits)
186+{
187+ if (s.l || (s.flags & CLEAR_HI)) {
188+ extract(d, s.reg, s.h, s.l, 32);
189+ expand(d, d, dbits);
190+ } else {
191+ expand(d, integer_t(s.reg, s.size(), s.flags, s.offset_ebp), dbits);
192+ }
193+}
194+
195+void GGLX86Assembler::expand(component_t& d, const component_t& s, int dbits)
196+{
197+ integer_t r(d.reg, 32, d.flags, d.offset_ebp);
198+ expand(r, s, dbits);
199+ d = component_t(r);
200+}
201+
202+void GGLX86Assembler::expand(integer_t& dst, const integer_t& src, int dbits)
203+{
204+ assert(src.size());
205+
206+ Scratch scratches(registerFile());
207+ int sbits = src.size();
208+ int s = src.reg;
209+ int d = dst.reg;
210+
211+ // be sure to set 'dst' after we read 'src' as they may be identical
212+ dst.s = dbits;
213+ dst.flags = 0;
214+
215+ if (dbits<=sbits) {
216+ if (s != d) {
217+ MOV_REG_TO_REG(s, d);
218+ }
219+ return;
220+ }
221+
222+ if (sbits == 1) {
223+ MOV_REG_TO_REG(s, d);
224+ SHL(dbits, d);
225+ SUB_REG_TO_REG(s, d);
226+ // d = (s<<dbits) - s;
227+ return;
228+ }
229+
230+ if (dbits % sbits) {
231+ MOV_REG_TO_REG(s, d);
232+ SHL(dbits-sbits, d);
233+ // d = s << (dbits-sbits);
234+ dbits -= sbits;
235+ int temp = scratches.obtain();
236+ do {
237+ MOV_REG_TO_REG(d, temp);
238+ SHR(sbits, temp);
239+ OR_REG_TO_REG(temp, d);
240+ // d |= d >> sbits;
241+ dbits -= sbits;
242+ sbits *= 2;
243+ } while(dbits>0);
244+ return;
245+ }
246+
247+ dbits -= sbits;
248+ do {
249+ MOV_REG_TO_REG(s, d);
250+ SHL(sbits, d);
251+ OR_REG_TO_REG(s, d);
252+ // d |= d<<sbits;
253+ s = d;
254+ dbits -= sbits;
255+ if (sbits*2 < dbits) {
256+ sbits *= 2;
257+ }
258+ } while(dbits>0);
259+}
260+
261+void GGLX86Assembler::downshift(
262+ pixel_t& d, int component, component_t s, reg_t& dither)
263+{
264+ const needs_t& needs = mBuilderContext.needs;
265+ Scratch scratches(registerFile());
266+ // s(temp) is loaded in build_blending
267+ s.reg = scratches.obtain();
268+ MOV_MEM_TO_REG(s.offset_ebp, EBP, s.reg);
269+
270+ int sh = s.h;
271+ int sl = s.l;
272+ int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
273+ int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
274+ int sbits = sh - sl;
275+
276+ int dh = d.format.c[component].h;
277+ int dl = d.format.c[component].l;
278+ int dbits = dh - dl;
279+ int dithering = 0;
280+
281+ ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
282+
283+ if (sbits>dbits) {
284+ // see if we need to dither
285+ dithering = mDithering;
286+ }
287+
288+ int ireg = d.reg;
289+ if (!(d.flags & FIRST)) {
290+ if (s.flags & CORRUPTIBLE) {
291+ ireg = s.reg;
292+ } else {
293+ ireg = scratches.obtain();
294+ }
295+ }
296+ d.flags &= ~FIRST;
297+
298+ if (maskHiBits) {
299+ // we need to mask the high bits (and possibly the lowbits too)
300+ // and we might be able to use immediate mask.
301+ if (!dithering) {
302+ // we don't do this if we only have maskLoBits because we can
303+ // do it more efficiently below (in the case where dl=0)
304+ const int offset = sh - dbits;
305+ if (dbits<=8 && offset >= 0) {
306+ const uint32_t mask = ((1<<dbits)-1) << offset;
307+ build_and_immediate(ireg, s.reg, mask, 32);
308+ s.reg = ireg;
309+ sl = offset;
310+ sbits = dbits;
311+ maskLoBits = maskHiBits = 0;
312+ }
313+ } else {
314+ // in the dithering case though, we need to preserve the lower bits
315+ const uint32_t mask = ((1<<sbits)-1) << sl;
316+ build_and_immediate(ireg, s.reg, mask, 32);
317+ s.reg = ireg;
318+ maskLoBits = maskHiBits = 0;
319+ }
320+ }
321+
322+ // XXX: we could special case (maskHiBits & !maskLoBits)
323+ // like we do for maskLoBits below, but it happens very rarely
324+ // that we have maskHiBits only and the conditions necessary to lead
325+ // to better code (like doing d |= s << 24)
326+
327+ if (maskHiBits) {
328+ MOV_REG_TO_REG(s.reg, ireg);
329+ SHL(32-sh, ireg);
330+ sl += 32-sh;
331+ sh = 32;
332+ s.reg = ireg;
333+ maskHiBits = 0;
334+ }
335+
336+ // Downsampling should be performed as follows:
337+ // V * ((1<<dbits)-1) / ((1<<sbits)-1)
338+ // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
339+ // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
340+ // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
341+ // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
342+ //
343+ // By approximating (1>>dbits) and (1>>sbits) to 0:
344+ //
345+ // V>>(sbits-dbits) - V>>sbits
346+ //
347+ // A good approximation is V>>(sbits-dbits),
348+ // but better one (needed for dithering) is:
349+ //
350+ // (V>>(sbits-dbits)<<sbits - V)>>sbits
351+ // (V<<dbits - V)>>sbits
352+ // (V - V>>dbits)>>(sbits-dbits)
353+
354+ // Dithering is done here
355+ if (dithering) {
356+ comment("dithering");
357+ if (sl) {
358+ MOV_REG_TO_REG(s.reg, ireg);
359+ SHR(sl, ireg);
360+ sh -= sl;
361+ sl = 0;
362+ s.reg = ireg;
363+ }
364+ // scaling (V-V>>dbits)
365+ int temp_reg = scratches.obtain();
366+ MOV_REG_TO_REG(s.reg, temp_reg);
367+ SHR(dbits, temp_reg);
368+ MOV_REG_TO_REG(s.reg, ireg);
369+ SUB_REG_TO_REG(temp_reg, ireg);
370+ scratches.recycle(temp_reg);
371+ const int shift = (GGL_DITHER_BITS - (sbits-dbits));
372+ dither.reg = scratches.obtain();
373+ MOV_MEM_TO_REG(dither.offset_ebp, EBP, dither.reg);
374+ if (shift>0) {
375+ temp_reg = scratches.obtain();
376+ MOV_REG_TO_REG(dither.reg, temp_reg);
377+ SHR(shift, temp_reg);
378+ ADD_REG_TO_REG(temp_reg, ireg);
379+ scratches.recycle(temp_reg);
380+ }
381+ else if (shift<0) {
382+ temp_reg = scratches.obtain();
383+ MOV_REG_TO_REG(dither.reg, temp_reg);
384+ SHL(-shift, temp_reg);
385+ ADD_REG_TO_REG(temp_reg, ireg);
386+ scratches.recycle(temp_reg);
387+ }
388+ else {
389+ ADD_REG_TO_REG(dither.reg, ireg);
390+ }
391+ scratches.recycle(dither.reg);
392+ s.reg = ireg;
393+ }
394+
395+ if ((maskLoBits|dithering) && (sh > dbits)) {
396+ int shift = sh-dbits;
397+ if (dl) {
398+ MOV_REG_TO_REG(s.reg, ireg);
399+ SHR(shift, ireg);
400+ if (ireg == d.reg) {
401+ MOV_REG_TO_REG(ireg, d.reg);
402+ SHL(dl, d.reg);
403+ } else {
404+ int temp_reg = scratches.obtain();
405+ MOV_REG_TO_REG(ireg, temp_reg);
406+ SHL(dl, temp_reg);
407+ OR_REG_TO_REG(temp_reg, d.reg);
408+ scratches.recycle(temp_reg);
409+ }
410+ } else {
411+ if (ireg == d.reg) {
412+ MOV_REG_TO_REG(s.reg, d.reg);
413+ SHR(shift, d.reg);
414+ } else {
415+ int temp_reg = scratches.obtain();
416+ MOV_REG_TO_REG(s.reg, temp_reg);
417+ SHR(shift, temp_reg);
418+ OR_REG_TO_REG(temp_reg, d.reg);
419+ scratches.recycle(temp_reg);
420+ }
421+ }
422+ } else {
423+ int shift = sh-dh;
424+ if (shift>0) {
425+ if (ireg == d.reg) {
426+ MOV_REG_TO_REG(s.reg, d.reg);
427+ SHR(shift, d.reg);
428+ } else {
429+ int temp_reg = scratches.obtain();
430+ MOV_REG_TO_REG(s.reg, temp_reg);
431+ SHR(shift, temp_reg);
432+ OR_REG_TO_REG(temp_reg, d.reg);
433+ scratches.recycle(temp_reg);
434+ }
435+ } else if (shift<0) {
436+ if (ireg == d.reg) {
437+ MOV_REG_TO_REG(s.reg, d.reg);
438+ SHL(-shift, d.reg);
439+ } else {
440+ int temp_reg = scratches.obtain();
441+ MOV_REG_TO_REG(s.reg, temp_reg);
442+ SHL(-shift, temp_reg);
443+ OR_REG_TO_REG(temp_reg, d.reg);
444+ scratches.recycle(temp_reg);
445+ }
446+ } else {
447+ if (ireg == d.reg) {
448+ if (s.reg != d.reg) {
449+ MOV_REG_TO_REG(s.reg, d.reg);
450+ }
451+ } else {
452+ OR_REG_TO_REG(s.reg, d.reg);
453+ }
454+ }
455+ }
456+}
457+
458+}; // namespace android
--- /dev/null
+++ b/libpixelflinger/codeflinger/x86/texturing.cpp
@@ -0,0 +1,1799 @@
1+/* libs/pixelflinger/codeflinger/x86/texturing.cpp
2+**
3+** Copyright 2006, The Android Open Source Project
4+**
5+** Licensed under the Apache License, Version 2.0 (the "License");
6+** you may not use this file except in compliance with the License.
7+** You may obtain a copy of the License at
8+**
9+** http://www.apache.org/licenses/LICENSE-2.0
10+**
11+** Unless required by applicable law or agreed to in writing, software
12+** distributed under the License is distributed on an "AS IS" BASIS,
13+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+** See the License for the specific language governing permissions and
15+** limitations under the License.
16+*/
17+
18+#include <assert.h>
19+#include <stdint.h>
20+#include <stdlib.h>
21+#include <stdio.h>
22+#include <sys/types.h>
23+
24+#include <cutils/log.h>
25+
26+#include "codeflinger/x86/GGLX86Assembler.h"
27+
28+
29+namespace android {
30+
31+// ---------------------------------------------------------------------------
32+
33+// iterators are initialized like this:
34+// (intToFixedCenter(x) * dx)>>16 + x0
35+// ((x<<16 + 0x8000) * dx)>>16 + x0
36+// ((x<<16)*dx + (0x8000*dx))>>16 + x0
37+// ( (x*dx) + dx>>1 ) + x0
38+// (x*dx) + (dx>>1 + x0)
39+
40+void GGLX86Assembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x)
41+{
42+ context_t const* c = mBuilderContext.c;
43+ const needs_t& needs = mBuilderContext.needs;
44+ int temp_reg;
45+
46+ if (mSmooth) {
47+ // NOTE: we could take this case in the mDithering + !mSmooth case,
48+ // but this would use up to 4 more registers for the color components
49+ // for only a little added quality.
50+ // Currently, this causes the system to run out of registers in
51+ // some case (see issue #719496)
52+
53+ comment("compute initial iterated color (smooth and/or dither case)");
54+
55+ parts.iterated_packed = 0;
56+ parts.packed = 0;
57+
58+ // 0x1: color component
59+ // 0x2: iterators
60+ //parts.reload = 3;
61+ const int optReload = mOptLevel >> 1;
62+ if (optReload >= 3) parts.reload = 0; // reload nothing
63+ else if (optReload == 2) parts.reload = 2; // reload iterators
64+ else if (optReload == 1) parts.reload = 1; // reload colors
65+ else if (optReload <= 0) parts.reload = 3; // reload both
66+
67+ if (!mSmooth) {
68+ // we're not smoothing (just dithering), we never have to
69+ // reload the iterators
70+ parts.reload &= ~2;
71+ }
72+
73+ Scratch scratches(registerFile());
74+ const int t0 = (parts.reload & 1) ? scratches.obtain() : 0;
75+ const int t1 = (parts.reload & 2) ? scratches.obtain() : 0;
76+ for (int i=0 ; i<4 ; i++) {
77+ if (!mInfo[i].iterated)
78+ continue;
79+ // this component exists in the destination and is not replaced
80+ // by a texture unit.
81+ const int c = (parts.reload & 1) ? t0 : obtainReg();
82+ if (i==0) CONTEXT_LOAD(c, iterators.ydady);
83+ if (i==1) CONTEXT_LOAD(c, iterators.ydrdy);
84+ if (i==2) CONTEXT_LOAD(c, iterators.ydgdy);
85+ if (i==3) CONTEXT_LOAD(c, iterators.ydbdy);
86+ parts.argb[i].reg = c;
87+
88+ if (mInfo[i].smooth) {
89+ parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg();
90+ const int dvdx = parts.argb_dx[i].reg;
91+ temp_reg = scratches.obtain();
92+ CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx);
93+ MOV_REG_TO_REG(dvdx, temp_reg);
94+ IMUL(x.reg, temp_reg);
95+ ADD_REG_TO_REG(temp_reg, c);
96+ scratches.recycle(temp_reg);
97+
98+ // adjust the color iterator to make sure it won't overflow
99+ if (!mAA) {
100+ // this is not needed when we're using anti-aliasing
101+ // because we will (have to) clamp the components
102+ // anyway.
103+ int end = scratches.obtain();
104+ MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, end);
105+ SHR(16, end);
106+ IMUL(end, dvdx);
107+ temp_reg = end;
108+ // c - (dvdx*end + c) = -(dvdx*end)
109+ MOV_REG_TO_REG(dvdx, temp_reg);
110+ NEG(temp_reg);
111+ ADD_REG_TO_REG(c, dvdx);
112+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp_reg, c);
113+ /*
114+ SUB_REG_TO_REG(dvdx, temp_reg);
115+ switch(i) {
116+ case 0:
117+ JCC(Mnemonic_JNS, "1f_init_iterated_color");
118+ SUB_REG_TO_REG(dvdx, c);
119+ label("1f_init_iterated_color");
120+ break;
121+ case 1:
122+ JCC(Mnemonic_JNS, "2f_init_iterated_color");
123+ SUB_REG_TO_REG(dvdx, c);
124+ label("2f_init_iterated_color");
125+ break;
126+ case 2:
127+ JCC(Mnemonic_JNS, "3f_init_iterated_color");
128+ SUB_REG_TO_REG(dvdx, c);
129+ label("3f_init_iterated_color");
130+ break;
131+ case 3:
132+ JCC(Mnemonic_JNS, "4f_init_iterated_color");
133+ SUB_REG_TO_REG(dvdx, c);
134+ label("4f_init_iterated_color");
135+ break;
136+ }
137+ */
138+
139+ MOV_REG_TO_REG(c, temp_reg);
140+ SAR(31, temp_reg);
141+ NOT(temp_reg);
142+ AND_REG_TO_REG(temp_reg, c);
143+ scratches.recycle(end);
144+ }
145+ if(parts.reload & 2)
146+ scratches.recycle(dvdx);
147+ else
148+ recycleReg(dvdx);
149+ }
150+ CONTEXT_STORE(c, generated_vars.argb[i].c);
151+ if(parts.reload & 1)
152+ scratches.recycle(parts.argb[i].reg);
153+ else
154+ recycleReg(parts.argb[i].reg);
155+
156+ parts.argb[i].reg = -1;
157+ //if (parts.reload & 1) {
158+ // //MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
159+ //}
160+ }
161+ } else {
162+ // We're not smoothed, so we can
163+ // just use a packed version of the color and extract the
164+ // components as needed (or not at all if we don't blend)
165+
166+ // figure out if we need the iterated color
167+ int load = 0;
168+ for (int i=0 ; i<4 ; i++) {
169+ component_info_t& info = mInfo[i];
170+ if ((info.inDest || info.needed) && !info.replaced)
171+ load |= 1;
172+ }
173+
174+ parts.iterated_packed = 1;
175+ parts.packed = (!mTextureMachine.mask && !mBlending
176+ && !mFog && !mDithering);
177+ parts.reload = 0;
178+ if (load || parts.packed) {
179+ if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) {
180+ comment("load initial iterated color (8888 packed)");
181+ parts.iterated.setTo(obtainReg(),
182+ &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888]));
183+ CONTEXT_LOAD(parts.iterated.reg, packed8888);
184+ } else {
185+ comment("load initial iterated color (dest format packed)");
186+
187+ parts.iterated.setTo(obtainReg(), &mCbFormat);
188+
189+ // pre-mask the iterated color
190+ const int bits = parts.iterated.size();
191+ const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
192+ uint32_t mask = 0;
193+ if (mMasking) {
194+ for (int i=0 ; i<4 ; i++) {
195+ const int component_mask = 1<<i;
196+ const int h = parts.iterated.format.c[i].h;
197+ const int l = parts.iterated.format.c[i].l;
198+ if (h && (!(mMasking & component_mask))) {
199+ mask |= ((1<<(h-l))-1) << l;
200+ }
201+ }
202+ }
203+
204+ if (mMasking && ((mask & size)==0)) {
205+ // none of the components are present in the mask
206+ } else {
207+ CONTEXT_LOAD(parts.iterated.reg, packed);
208+ if (mCbFormat.size == 1) {
209+ int imm = 0xFF;
210+ AND_IMM_TO_REG(imm, parts.iterated.reg);
211+ } else if (mCbFormat.size == 2) {
212+ SHR(16, parts.iterated.reg);
213+ }
214+ }
215+
216+ // pre-mask the iterated color
217+ if (mMasking) {
218+ //AND_IMM_TO_REG(mask, parts.iterated.reg);
219+ build_and_immediate(parts.iterated.reg, parts.iterated.reg,
220+ mask, bits);
221+ }
222+ }
223+ mCurSp = mCurSp - 4;
224+ parts.iterated.offset_ebp = mCurSp;
225+ MOV_REG_TO_MEM(parts.iterated.reg, parts.iterated.offset_ebp, EBP);
226+ //PUSH(parts.iterated.reg);
227+ recycleReg(parts.iterated.reg);
228+ parts.iterated.reg=-1;
229+ }
230+ }
231+}
232+
233+void GGLX86Assembler::build_iterated_color(
234+ component_t& fragment,
235+ fragment_parts_t& parts,
236+ int component,
237+ Scratch& regs)
238+{
239+
240+ if (!mInfo[component].iterated)
241+ return;
242+
243+ if (parts.iterated_packed) {
244+ // iterated colors are packed, extract the one we need
245+ parts.iterated.reg = regs.obtain();
246+ MOV_MEM_TO_REG(parts.iterated.offset_ebp, EBP, parts.iterated.reg);
247+ extract(fragment, parts.iterated, component);
248+ regs.recycle(parts.iterated.reg);
249+ } else {
250+ fragment.h = GGL_COLOR_BITS;
251+ fragment.l = GGL_COLOR_BITS - 8;
252+ fragment.flags |= CLEAR_LO;
253+ // iterated colors are held in their own register,
254+ // (smooth and/or dithering case)
255+ Scratch scratches(registerFile());
256+ mBuilderContext.Rctx = scratches.obtain();
257+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
258+ if (parts.reload==3) {
259+ // this implies mSmooth
260+ int dx = scratches.obtain();
261+ CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
262+ CONTEXT_LOAD(dx, generated_vars.argb[component].dx);
263+ ADD_REG_TO_REG(fragment.reg, dx);
264+ CONTEXT_STORE(dx, generated_vars.argb[component].c);
265+ scratches.recycle(dx);
266+ } else if (parts.reload & 1) {
267+ //MOV_MEM_TO_REG(parts.argb[component].offset_ebp, EBP, fragment.reg);
268+ CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
269+ } else {
270+ // we don't reload, so simply rename the register and mark as
271+ // non CORRUPTIBLE so that the texture env or blending code
272+ // won't modify this (renamed) register
273+ //regs.recycle(fragment.reg);
274+ //MOV_MEM_TO_REG(parts.argb[component].offset_ebp, EBP, fragment.reg);
275+ // it will also be used in build_smooth_shade
276+ CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
277+ //fragment.reg = parts.argb[component].reg;
278+ //fragment.flags &= ~CORRUPTIBLE;
279+ }
280+ scratches.recycle(mBuilderContext.Rctx);
281+ if (mInfo[component].smooth && mAA) {
282+ // when using smooth shading AND anti-aliasing, we need to clamp
283+ // the iterators because there is always an extra pixel on the
284+ // edges, which most of the time will cause an overflow
285+ // (since technically its outside of the domain).
286+ int temp = scratches.obtain();
287+ MOV_REG_TO_REG(fragment.reg, temp);
288+ SAR(31, temp);
289+ NOT(temp);
290+ OR_REG_TO_REG(temp, fragment.reg);
291+ component_sat(fragment, temp);
292+ scratches.recycle(temp);
293+ }
294+ }
295+}
296+
297+// ---------------------------------------------------------------------------
298+
299+void GGLX86Assembler::decodeLogicOpNeeds(const needs_t& needs)
300+{
301+ // gather some informations about the components we need to process...
302+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
303+ switch(opcode) {
304+ case GGL_COPY:
305+ mLogicOp = 0;
306+ break;
307+ case GGL_CLEAR:
308+ case GGL_SET:
309+ mLogicOp = LOGIC_OP;
310+ break;
311+ case GGL_AND:
312+ case GGL_AND_REVERSE:
313+ case GGL_AND_INVERTED:
314+ case GGL_XOR:
315+ case GGL_OR:
316+ case GGL_NOR:
317+ case GGL_EQUIV:
318+ case GGL_OR_REVERSE:
319+ case GGL_OR_INVERTED:
320+ case GGL_NAND:
321+ mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST;
322+ break;
323+ case GGL_NOOP:
324+ case GGL_INVERT:
325+ mLogicOp = LOGIC_OP|LOGIC_OP_DST;
326+ break;
327+ case GGL_COPY_INVERTED:
328+ mLogicOp = LOGIC_OP|LOGIC_OP_SRC;
329+ break;
330+ };
331+}
332+
333+void GGLX86Assembler::decodeTMUNeeds(const needs_t& needs, context_t const* c)
334+{
335+ uint8_t replaced=0;
336+ mTextureMachine.mask = 0;
337+ mTextureMachine.activeUnits = 0;
338+ for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) {
339+ texture_unit_t& tmu = mTextureMachine.tmu[i];
340+ if (replaced == 0xF) {
341+ // all components are replaced, skip this TMU.
342+ tmu.format_idx = 0;
343+ tmu.mask = 0;
344+ tmu.replaced = replaced;
345+ continue;
346+ }
347+ tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]);
348+ tmu.format = c->formats[tmu.format_idx];
349+ tmu.bits = tmu.format.size*8;
350+ tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]);
351+ tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]);
352+ tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i]));
353+ tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]);
354+ tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i])
355+ && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now
356+
357+ // 5551 linear filtering is not supported
358+ if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551)
359+ tmu.linear = 0;
360+
361+ tmu.mask = 0;
362+ tmu.replaced = replaced;
363+
364+ if (tmu.format_idx) {
365+ mTextureMachine.activeUnits++;
366+ if (tmu.format.c[0].h) tmu.mask |= 0x1;
367+ if (tmu.format.c[1].h) tmu.mask |= 0x2;
368+ if (tmu.format.c[2].h) tmu.mask |= 0x4;
369+ if (tmu.format.c[3].h) tmu.mask |= 0x8;
370+ if (tmu.env == GGL_REPLACE) {
371+ replaced |= tmu.mask;
372+ } else if (tmu.env == GGL_DECAL) {
373+ if (!tmu.format.c[GGLFormat::ALPHA].h) {
374+ // if we don't have alpha, decal does nothing
375+ tmu.mask = 0;
376+ } else {
377+ // decal always ignores At
378+ tmu.mask &= ~(1<<GGLFormat::ALPHA);
379+ }
380+ }
381+ }
382+ mTextureMachine.mask |= tmu.mask;
383+ ////printf("%d: mask=%08lx, replaced=%08lx\n",
384+ // i, int(tmu.mask), int(tmu.replaced));
385+ }
386+ mTextureMachine.replaced = replaced;
387+ mTextureMachine.directTexture = 0;
388+ ////printf("replaced=%08lx\n", mTextureMachine.replaced);
389+}
390+
391+
392+void GGLX86Assembler::init_textures(
393+ tex_coord_t* coords,
394+ const reg_t& x, const reg_t& y)
395+{
396+ context_t const* c = mBuilderContext.c;
397+ const needs_t& needs = mBuilderContext.needs;
398+ reg_t temp_reg_t;
399+ int Rx = x.reg;
400+ int Ry = y.reg;
401+
402+ if (mTextureMachine.mask) {
403+ comment("compute texture coordinates");
404+ }
405+
406+ // init texture coordinates for each tmu
407+ const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n);
408+ const bool multiTexture = mTextureMachine.activeUnits > 1;
409+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
410+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
411+ if (tmu.format_idx == 0)
412+ continue;
413+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
414+ (tmu.twrap == GGL_NEEDS_WRAP_11))
415+ {
416+ Scratch scratches(registerFile());
417+ // 1:1 texture
418+ pointer_t& txPtr = coords[i].ptr;
419+ txPtr.setTo(obtainReg(), tmu.bits);
420+ CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy);
421+ SAR(16, txPtr.reg);
422+ ADD_REG_TO_REG(txPtr.reg, Rx);
423+ CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy);
424+ SAR(16, txPtr.reg);
425+ ADD_REG_TO_REG(txPtr.reg, Ry);
426+ // Rx and Ry are changed
427+ // Rx = Rx + ti.iterators.ydsdy>>16
428+ // Ry = Ry + ti.iterators.ydtdy>>16
429+ // Rx = Ry * ti.stide + Rx
430+
431+ // merge base & offset
432+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
433+ IMUL(Ry, txPtr.reg);
434+ ADD_REG_TO_REG(txPtr.reg, Rx);
435+
436+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
437+ temp_reg_t.setTo(Rx);
438+ base_offset(txPtr, txPtr, temp_reg_t);
439+ //PUSH(txPtr.reg);
440+ mCurSp = mCurSp - 4;
441+ txPtr.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg, parts.z.reg
442+ MOV_REG_TO_MEM(txPtr.reg, txPtr.offset_ebp, EBP);
443+ recycleReg(txPtr.reg);
444+ txPtr.reg=-1;
445+ } else {
446+ Scratch scratches(registerFile());
447+ reg_t& s = coords[i].s;
448+ reg_t& t = coords[i].t;
449+ // s = (x * dsdx)>>16 + ydsdy
450+ // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0
451+ // t = (x * dtdx)>>16 + ydtdy
452+ // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0
453+ const int need_w = GGL_READ_NEEDS(W, needs.n);
454+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
455+ if (need_w) {
456+ s.setTo(obtainReg());
457+ t.setTo(obtainReg());
458+ CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy);
459+ CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy);
460+ CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
461+ CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
462+ recycleReg(s.reg);
463+ recycleReg(t.reg);
464+ } else {
465+ int ydsdy = scratches.obtain();
466+ int dsdx = scratches.obtain();
467+ CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy);
468+ CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
469+ IMUL(Rx, dsdx);
470+ ADD_REG_TO_REG(dsdx, ydsdy);
471+ CONTEXT_STORE(ydsdy, generated_vars.texture[i].spill[0]);
472+ scratches.recycle(ydsdy);
473+ scratches.recycle(dsdx);
474+
475+ int ydtdy = scratches.obtain();
476+ int dtdx = scratches.obtain();
477+ CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy);
478+ CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
479+ IMUL(Rx, dtdx);
480+ ADD_REG_TO_REG(dtdx, ydtdy);
481+ CONTEXT_STORE(ydtdy, generated_vars.texture[i].spill[1]);
482+ scratches.recycle(ydtdy);
483+ scratches.recycle(dtdx);
484+
485+ // s.reg = Rx * ti.dsdx + ydsdy
486+ // t.reg = Rx * ti.dtdx + ydtdy
487+ }
488+ }
489+
490+ // direct texture?
491+ if (!multiTexture && !mBlending && !mDithering && !mFog &&
492+ cb_format_idx == tmu.format_idx && !tmu.linear &&
493+ mTextureMachine.replaced == tmu.mask)
494+ {
495+ mTextureMachine.directTexture = i + 1;
496+ }
497+ }
498+}
499+
500+void GGLX86Assembler::build_textures( fragment_parts_t& parts,
501+ Scratch& regs)
502+{
503+ context_t const* c = mBuilderContext.c;
504+ const needs_t& needs = mBuilderContext.needs;
505+ reg_t temp_reg_t;
506+ //int Rctx = mBuilderContext.Rctx;
507+
508+
509+ const bool multiTexture = mTextureMachine.activeUnits > 1;
510+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
511+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
512+ if (tmu.format_idx == 0)
513+ continue;
514+
515+ pointer_t& txPtr = parts.coords[i].ptr;
516+ pixel_t& texel = parts.texel[i];
517+
518+ // repeat...
519+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
520+ (tmu.twrap == GGL_NEEDS_WRAP_11))
521+ { // 1:1 textures
522+ comment("fetch texel");
523+ texel.setTo(regs.obtain(), &tmu.format);
524+ txPtr.reg = regs.obtain();
525+ MOV_MEM_TO_REG(txPtr.offset_ebp, EBP, txPtr.reg);
526+ mCurSp = mCurSp - 4;
527+ texel.offset_ebp = mCurSp;
528+ load(txPtr, texel, WRITE_BACK);
529+ MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP);
530+ regs.recycle(texel.reg);
531+ regs.recycle(txPtr.reg);
532+ } else {
533+ Scratch scratches(registerFile());
534+ reg_t& s = parts.coords[i].s;
535+ reg_t& t = parts.coords[i].t;
536+ comment("reload s/t (multitexture or linear filtering)");
537+ s.reg = scratches.obtain();
538+ t.reg = scratches.obtain();
539+ mBuilderContext.Rctx = scratches.obtain();
540+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
541+ CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);
542+ CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
543+
544+ comment("compute repeat/clamp");
545+ int width = scratches.obtain();
546+ int height = scratches.obtain();
547+ int U = 0;
548+ int V = 0;
549+ // U and V will be stored onto the stack due to the limited register
550+ reg_t reg_U, reg_V;
551+
552+ CONTEXT_LOAD(width, generated_vars.texture[i].width);
553+ CONTEXT_LOAD(height, generated_vars.texture[i].height);
554+ scratches.recycle(mBuilderContext.Rctx);
555+
556+ int FRAC_BITS = 0;
557+ if (tmu.linear) {
558+ // linear interpolation
559+ if (tmu.format.size == 1) {
560+ // for 8-bits textures, we can afford
561+ // 7 bits of fractional precision at no
562+ // additional cost (we can't do 8 bits
563+ // because filter8 uses signed 16 bits muls)
564+ FRAC_BITS = 7;
565+ } else if (tmu.format.size == 2) {
566+ // filter16() is internally limited to 4 bits, so:
567+ // FRAC_BITS=2 generates less instructions,
568+ // FRAC_BITS=3,4,5 creates unpleasant artifacts,
569+ // FRAC_BITS=6+ looks good
570+ FRAC_BITS = 6;
571+ } else if (tmu.format.size == 4) {
572+ // filter32() is internally limited to 8 bits, so:
573+ // FRAC_BITS=4 looks good
574+ // FRAC_BITS=5+ looks better, but generates 3 extra ipp
575+ FRAC_BITS = 6;
576+ } else {
577+ // for all other cases we use 4 bits.
578+ FRAC_BITS = 4;
579+ }
580+ }
581+ int u = scratches.obtain();
582+ // s.reg and t.reg are recycled in wrapping
583+ wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS, scratches);
584+ int v = scratches.obtain();
585+ wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS, scratches);
586+
587+
588+ if (tmu.linear) {
589+
590+ //mBuilderContext.Rctx = scratches.obtain();
591+ //MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
592+ //CONTEXT_LOAD(width, generated_vars.texture[i].width);
593+ //CONTEXT_LOAD(height, generated_vars.texture[i].height);
594+ //scratches.recycle(mBuilderContext.Rctx);
595+
596+ comment("compute linear filtering offsets");
597+ // pixel size scale
598+ const int shift = 31 - gglClz(tmu.format.size);
599+ U = scratches.obtain();
600+ V = scratches.obtain();
601+
602+
603+ // sample the texel center
604+ SUB_IMM_TO_REG(1<<(FRAC_BITS-1), u);
605+ SUB_IMM_TO_REG(1<<(FRAC_BITS-1), v);
606+
607+ // get the fractionnal part of U,V
608+ MOV_REG_TO_REG(u, U);
609+ AND_IMM_TO_REG((1<<FRAC_BITS)-1, U);
610+ MOV_REG_TO_REG(v, V);
611+ AND_IMM_TO_REG((1<<FRAC_BITS)-1, V);
612+
613+ // below we will pop U and V in the filter function
614+ mCurSp = mCurSp - 4;
615+ MOV_REG_TO_MEM(U, mCurSp, EBP);
616+ reg_U.offset_ebp = mCurSp;
617+ mCurSp = mCurSp - 4;
618+ MOV_REG_TO_MEM(V, mCurSp, EBP);
619+ reg_V.offset_ebp = mCurSp;
620+
621+ scratches.recycle(U);
622+ scratches.recycle(V);
623+
624+ // compute width-1 and height-1
625+ SUB_IMM_TO_REG(1, width);
626+ SUB_IMM_TO_REG(1, height);
627+
628+ // the registers are used up
629+ int temp1 = scratches.obtain();
630+ int temp2 = scratches.obtain();
631+ // get the integer part of U,V and clamp/wrap
632+ // and compute offset to the next texel
633+ if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) {
634+ // u has already been REPEATed
635+ SAR(FRAC_BITS, u);
636+ CMOV_REG_TO_REG(Mnemonic_CMOVS, width, u);
637+ MOV_IMM_TO_REG(1<<shift, temp1);
638+ MOV_REG_TO_REG(width, temp2);
639+ // SHL may pollute the CF flag
640+ SHL(shift, temp2);
641+ mCurSp = mCurSp - 4;
642+ int width_offset_ebp = mCurSp;
643+ // width will be changed after the first comparison
644+ MOV_REG_TO_MEM(width, width_offset_ebp, EBP);
645+ CMP_REG_TO_REG(width, u);
646+ CMOV_REG_TO_REG(Mnemonic_CMOVL, temp1, width);
647+ if (shift) {
648+ CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp2, width);
649+ }
650+ MOV_REG_TO_REG(width, temp1);
651+ NEG(temp1);
652+ // width is actually changed
653+ CMP_MEM_TO_REG(EBP, width_offset_ebp, u);
654+ CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp1, width);
655+ } else {
656+ // u has not been CLAMPed yet
657+ // algorithm:
658+ // if ((u>>4) >= width)
659+ // u = width<<4
660+ // width = 0
661+ // else
662+ // width = 1<<shift
663+ // u = u>>4; // get integer part
664+ // if (u<0)
665+ // u = 0
666+ // width = 0
667+ // generated_vars.rt = width
668+
669+ MOV_REG_TO_REG(width, temp2);
670+ SHL(FRAC_BITS, temp2);
671+ MOV_REG_TO_REG(u, temp1);
672+ SAR(FRAC_BITS, temp1);
673+ CMP_REG_TO_REG(temp1, width);
674+ CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, u);
675+ // mov doesn't affect the flags
676+ MOV_IMM_TO_REG(0, temp2);
677+ CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, width);
678+ MOV_IMM_TO_REG(1 << shift, temp2);
679+ CMOV_REG_TO_REG(Mnemonic_CMOVG, temp2, width);
680+
681+ MOV_IMM_TO_REG(0, temp2);
682+ SAR(FRAC_BITS, u);
683+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, u);
684+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, width);
685+ }
686+ scratches.recycle(temp1);
687+ scratches.recycle(temp2);
688+ mBuilderContext.Rctx = scratches.obtain();
689+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
690+ CONTEXT_STORE(width, generated_vars.rt);
691+
692+ const int stride = width;
693+ CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
694+ scratches.recycle(mBuilderContext.Rctx);
695+
696+ temp1 = scratches.obtain();
697+ temp2 = scratches.obtain();
698+
699+ int height_offset_ebp;
700+ if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) {
701+ // v has already been REPEATed
702+ SAR(FRAC_BITS, v);
703+ CMOV_REG_TO_REG(Mnemonic_CMOVS, height, v);
704+ MOV_IMM_TO_REG(1<<shift, temp1);
705+ MOV_REG_TO_REG(height, temp2);
706+ SHL(shift, temp2);
707+ mCurSp = mCurSp - 4;
708+ height_offset_ebp = mCurSp;
709+ // height will be changed after the first comparison
710+ MOV_REG_TO_MEM(height, height_offset_ebp, EBP);
711+ CMP_REG_TO_REG(height, v);
712+ CMOV_REG_TO_REG(Mnemonic_CMOVL, temp1, height);
713+ if (shift) {
714+ CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp2, height);
715+ }
716+ MOV_REG_TO_REG(height, temp1);
717+ NEG(temp1);
718+ // height is actually changed
719+ CMP_MEM_TO_REG(EBP, height_offset_ebp, v);
720+ CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp1, height);
721+ IMUL(stride, height);
722+ } else {
723+ // u has not been CLAMPed yet
724+ MOV_REG_TO_REG(height, temp2);
725+ SHL(FRAC_BITS, temp2);
726+ MOV_REG_TO_REG(v, temp1);
727+ SAR(FRAC_BITS, temp1);
728+
729+ mCurSp = mCurSp - 4;
730+ height_offset_ebp = mCurSp;
731+ // height may be changed after the first comparison
732+ MOV_REG_TO_MEM(height, height_offset_ebp, EBP);
733+
734+ CMP_REG_TO_REG(temp1, height);
735+ CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, v);
736+ MOV_IMM_TO_REG(0, temp2);
737+ CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, height);
738+
739+ if (shift) {
740+ // stride = width. It's not used
741+ // shift may pollute the flags
742+ SHL(shift, stride);
743+ // height may be changed to 0
744+ CMP_REG_TO_MEM(temp1, height_offset_ebp, EBP);
745+ CMOV_REG_TO_REG(Mnemonic_CMOVG, stride, height);
746+ } else {
747+ CMOV_REG_TO_REG(Mnemonic_CMOVG, stride, height);
748+ }
749+ MOV_IMM_TO_REG(0, temp2);
750+ SAR(FRAC_BITS, v);
751+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, v);
752+ CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, height);
753+ }
754+ scratches.recycle(temp1);
755+ scratches.recycle(temp2);
756+ mBuilderContext.Rctx = scratches.obtain();
757+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
758+ CONTEXT_STORE(height, generated_vars.lb);
759+ scratches.recycle(mBuilderContext.Rctx);
760+ }
761+
762+ scratches.recycle(width);
763+ scratches.recycle(height);
764+
765+ // iterate texture coordinates...
766+ comment("iterate s,t");
767+ int dsdx = scratches.obtain();
768+ s.reg = scratches.obtain();
769+ mBuilderContext.Rctx = scratches.obtain();
770+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
771+ CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
772+ CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);
773+ ADD_REG_TO_REG(dsdx, s.reg);
774+ CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
775+ scratches.recycle(s.reg);
776+ scratches.recycle(dsdx);
777+ int dtdx = scratches.obtain();
778+ t.reg = scratches.obtain();
779+ CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
780+ CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
781+ ADD_REG_TO_REG(dtdx, t.reg);
782+ CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
783+ scratches.recycle(dtdx);
784+ scratches.recycle(t.reg);
785+
786+ // merge base & offset...
787+ comment("merge base & offset");
788+ texel.setTo(scratches.obtain(), &tmu.format);
789+ //txPtr.setTo(texel.reg, tmu.bits);
790+ txPtr.setTo(scratches.obtain(), tmu.bits);
791+ int stride = scratches.obtain();
792+ CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
793+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
794+ scratches.recycle(mBuilderContext.Rctx);
795+ MOVSX_REG_TO_REG(OpndSize_16, v, v);
796+ MOVSX_REG_TO_REG(OpndSize_16, stride, stride);
797+ IMUL(v, stride);
798+ ADD_REG_TO_REG(stride, u);// u+v*stride
799+ temp_reg_t.setTo(u);
800+ base_offset(txPtr, txPtr, temp_reg_t);
801+
802+ // recycle registers we don't need anymore
803+ scratches.recycle(u);
804+ scratches.recycle(v);
805+ scratches.recycle(stride);
806+
807+ mCurSp = mCurSp - 4;
808+ texel.offset_ebp = mCurSp;
809+ // load texel
810+ if (!tmu.linear) {
811+ comment("fetch texel in building texture");
812+ load(txPtr, texel, 0);
813+ MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP);
814+ scratches.recycle(texel.reg);
815+ scratches.recycle(txPtr.reg);
816+ } else {
817+ comment("fetch texel, bilinear");
818+ // the registes are not enough. We spill texel and previous U and V
819+ // texel.reg is recycled in the following functions since there are more than one code path
820+ switch (tmu.format.size) {
821+ case 1:
822+ filter8(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches);
823+ break;
824+ case 2:
825+ filter16(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches);
826+ break;
827+ case 3:
828+ filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS);
829+ break;
830+ case 4:
831+ filter32(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches);
832+ break;
833+ }
834+ }
835+ }
836+ }
837+}
838+
839+void GGLX86Assembler::build_iterate_texture_coordinates(
840+ const fragment_parts_t& parts)
841+{
842+ const bool multiTexture = mTextureMachine.activeUnits > 1;
843+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
844+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
845+ if (tmu.format_idx == 0)
846+ continue;
847+
848+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
849+ (tmu.twrap == GGL_NEEDS_WRAP_11))
850+ { // 1:1 textures
851+ const pointer_t& txPtr = parts.coords[i].ptr;
852+ ADD_IMM_TO_MEM(txPtr.size>>3, txPtr.offset_ebp, EBP);
853+ } else {
854+ Scratch scratches(registerFile());
855+ int s = parts.coords[i].s.reg;
856+ int t = parts.coords[i].t.reg;
857+ mBuilderContext.Rctx = scratches.obtain();
858+ MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx);
859+ s = scratches.obtain();
860+ int dsdx = scratches.obtain();
861+ CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]);
862+ CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
863+ ADD_REG_TO_REG(dsdx, s);
864+ CONTEXT_STORE(s, generated_vars.texture[i].spill[0]);
865+ scratches.recycle(s);
866+ scratches.recycle(dsdx);
867+ int dtdx = scratches.obtain();
868+ t = scratches.obtain();
869+ CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]);
870+ CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
871+ ADD_REG_TO_REG(dtdx, t);
872+ CONTEXT_STORE(t, generated_vars.texture[i].spill[1]);
873+ scratches.recycle(t);
874+ scratches.recycle(dtdx);
875+ }
876+ }
877+}
878+
879+void GGLX86Assembler::filter8(
880+ const fragment_parts_t& parts,
881+ pixel_t& texel, const texture_unit_t& tmu,
882+ reg_t reg_U, reg_t reg_V, pointer_t& txPtr,
883+ int FRAC_BITS, Scratch& scratches)
884+{
885+ if (tmu.format.components != GGL_ALPHA &&
886+ tmu.format.components != GGL_LUMINANCE)
887+ {
888+ // this is a packed format, and we don't support
889+ // linear filtering (it's probably RGB 332)
890+ // Should not happen with OpenGL|ES
891+ MOVZX_MEM_TO_REG(OpndSize_8, txPtr.reg, 0, texel.reg);
892+ MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP);
893+ scratches.recycle(texel.reg);
894+ scratches.recycle(txPtr.reg);
895+ return;
896+ }
897+
898+ // ------------------------
899+
900+ //int d = scratches.obtain();
901+ //int u = scratches.obtain();
902+ //int k = scratches.obtain();
903+
904+ scratches.recycle(texel.reg);
905+ int rt = scratches.obtain();
906+ int lb = scratches.obtain();
907+
908+ // RB -> U * V
909+
910+ mBuilderContext.Rctx = scratches.obtain();
911+ MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx);
912+ CONTEXT_LOAD(rt, generated_vars.rt);
913+ CONTEXT_LOAD(lb, generated_vars.lb);
914+ scratches.recycle(mBuilderContext.Rctx);
915+ int pixel= scratches.obtain();
916+
917+ int offset = pixel;
918+
919+ MOV_REG_TO_REG(rt, offset);
920+ ADD_REG_TO_REG(lb, offset);
921+
922+ int temp_reg1 = scratches.obtain();
923+ int temp_reg2 = scratches.obtain();
924+ // it seems that the address mode with base and scale reg cannot be encoded correctly
925+ //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, temp_reg1, OpndSize_8);
926+ ADD_REG_TO_REG(txPtr.reg, offset);
927+ MOVZX_MEM_TO_REG(OpndSize_8, offset, 0, temp_reg1);
928+ // pixel is only 8-bits
929+ MOV_REG_TO_REG(temp_reg1, pixel);
930+ MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1);
931+ MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg2);
932+ IMUL(temp_reg2, temp_reg1);
933+ MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel);
934+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg1, temp_reg2);
935+ IMUL(temp_reg2, pixel);
936+ NEG(temp_reg1);
937+ ADD_IMM_TO_REG(1<<(FRAC_BITS*2), temp_reg1);
938+ mCurSp = mCurSp - 4;
939+ int d_offset_ebp = mCurSp;
940+ MOV_REG_TO_MEM(pixel, d_offset_ebp, EBP);
941+ mCurSp = mCurSp - 4;
942+ int k_offset_ebp = mCurSp;
943+ MOV_REG_TO_MEM(temp_reg1, k_offset_ebp, EBP);
944+
945+
946+ // LB -> (1-U) * V
947+ MOV_MEM_TO_REG(reg_U.offset_ebp, EBP, temp_reg2);
948+ NEG(temp_reg2);
949+ ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2);
950+ MOV_REG_TO_MEM(temp_reg2, reg_U.offset_ebp, EBP);
951+
952+ //MOV_MEM_SCALE_TO_REG(txPtr.reg, lb, 1, pixel, OpndSize_8);
953+ ADD_REG_TO_REG(txPtr.reg, lb);
954+ MOVZX_MEM_TO_REG(OpndSize_8, lb, 0, pixel);
955+
956+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg2);
957+ MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg1);
958+ IMUL(temp_reg1, temp_reg2);
959+ MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel);
960+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg1);
961+ IMUL(pixel, temp_reg1);
962+ ADD_REG_TO_MEM(temp_reg1, EBP, d_offset_ebp);
963+ SUB_REG_TO_MEM(temp_reg2, EBP, k_offset_ebp);
964+
965+
966+ // LT -> (1-U)*(1-V)
967+ MOV_MEM_TO_REG(reg_V.offset_ebp, EBP, temp_reg2);
968+ NEG(temp_reg2);
969+ ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2);
970+ MOV_REG_TO_MEM(temp_reg2, reg_V.offset_ebp, EBP);
971+
972+ MOVZX_MEM_TO_REG(OpndSize_8, txPtr.reg, 0, pixel);
973+
974+ MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1);
975+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg2);
976+ IMUL(temp_reg1, temp_reg2);
977+ MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg1);
978+ MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel);
979+ IMUL(pixel, temp_reg1);
980+ ADD_REG_TO_MEM(temp_reg1, EBP, d_offset_ebp);
981+
982+ // RT -> U*(1-V)
983+ //MOV_MEM_SCALE_TO_REG(txPtr.reg, rt, 1, pixel, OpndSize_8);
984+ ADD_REG_TO_REG(txPtr.reg, rt);
985+ MOVZX_MEM_TO_REG(OpndSize_8, rt, 0, pixel);
986+
987+ int k = rt;
988+ MOV_MEM_TO_REG(k_offset_ebp, EBP, k);
989+ SUB_REG_TO_REG(temp_reg2, k);
990+ MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel);
991+ MOVSX_REG_TO_REG(OpndSize_16, k, k);
992+ IMUL(pixel, k);
993+ ADD_MEM_TO_REG(EBP, d_offset_ebp, k);
994+ MOV_REG_TO_MEM(k, texel.offset_ebp, EBP);
995+ scratches.recycle(rt);
996+ scratches.recycle(lb);
997+ scratches.recycle(pixel);
998+ scratches.recycle(txPtr.reg);
999+ scratches.recycle(temp_reg1);
1000+ scratches.recycle(temp_reg2);
1001+ for (int i=0 ; i<4 ; i++) {
1002+ if (!texel.format.c[i].h) continue;
1003+ texel.format.c[i].h = FRAC_BITS*2+8;
1004+

Part of diff was cut off due to size limit. Use your local client to view the full diff.