Mega Code Archive

Matematiksel fonksiyonlar [en hızlıları]

Özellikle grafik-matematik işlemleri yapıyorsanız hızlı işlem yapmanız gerekir. Hız sağlanabilmesi için iki önemli bileşen: ASSEMBLER ve REGISTER Asembler ana dili sayılır. Register ise yazdığınız program parçasının CPU nun cashe belleğinde saklanmasını sağlar..Böylece program parçacığınız çok hızlı çalıştırılmış (program parçasına daha kısa sürede erişilmiş) olur. Dikkat!!! Çok hızlı çalışması gereken küçük program parçacıklarınızı(prosedür,fonksiyon) register olarak tanıtın. Aksi taktirde hız yerine yavaşlık elde etmiş olursunuz.Çünkü cashe belleğinde bir sınırı var...(128,256,512 kb,...) Hız farkını denemeyi unutmayın!!! kendi gözünüzle görün daha çok matematiksel işlemlerde kullanabilirsiniz. register olarak tanımlamak için: procedure proc_adi(); register; function Func_adi(): func_turu; register; normalde tanımladığınız fonksiyonlar ve prosedürler stdcall olarak tanımlanmış sayılır. function LnXP1(X: Extended): Extended; asm FLDLN2 MOV AX,WORD PTR X+8 { exponent } FLD X CMP AX,$3FFD { .4225 } JB @@1 FLD1 FADD FYL2X JMP @@2 @@1: FYL2XP1 @@2: FWAIT end; // Log10 // function Log10(X: Extended): Extended; // Log.10(X):=Log.2(X) * Log.10(2) asm FLDLG2 { Log base ten of 2 } FLD X FYL2X end; // Log2 // function Log2(X: Extended): Extended; asm FLD1 FLD X FYL2X end; // Log2 // function Log2(X: Single): Single; asm FLD1 FLD X FYL2X end; // LogN // function LogN(Base, X: Extended): Extended; // Log.N(X):=Log.2(X) / Log.2(N) asm FLD1 FLD X FYL2X FLD1 FLD Base FYL2X FDIV end; // IntPower // function IntPower(Base: Extended; Exponent: Integer): Extended; register; asm mov ecx, eax cdq fld1 { Result:=1 } xor eax, edx sub eax, edx { eax:=Abs(Exponent) } jz @@3 fld Base jmp @@2 @@1: fmul ST, ST { X:=Base * Base } @@2: shr eax,1 jnc @@1 fmul ST(1),ST { Result:=Result * X } jnz @@1 fstp st { pop X from FPU stack } cmp ecx, 0 jge @@3 fld1 fdivrp { Result:=1 / Result } @@3: fwait end; // Power // function Power(const Base, Exponent: Single): Single; begin if Exponent=cZero then Result:=cOne { n**0 = 1 } else if (Base=cZero) and (Exponent>cZero) then Result:=cZero { 0**n = 0, n > 0 } else Result:=Exp(Exponent * Ln(Base)); end; // Power (int exponent) // function Power(Base: Single; Exponent: Integer): Single; asm mov ecx, eax cdq fld1 { Result:=1 } xor eax, edx sub eax, edx { eax:=Abs(Exponent) } jz @@3 fld Base jmp @@2 @@1: fmul ST, ST { X:=Base * Base } @@2: shr eax,1 jnc @@1 fmul ST(1),ST { Result:=Result * X } jnz @@1 fstp st { pop X from FPU stack } cmp ecx, 0 jge @@3 fld1 fdivrp { Result:=1 / Result } @@3: end; // DegToRad (extended) // function DegToRad(const Degrees: Extended): Extended; begin Result:=Degrees * (PI / 180); end; // DegToRad (single) // function DegToRad(const Degrees : Single) : Single; register; // Result:=Degrees * cPIdiv180; // don't laugh, Delphi's compiler manages to make a nightmare of this one // with pushs, pops, etc. in its default compile... (this one is twice faster !) asm FLD DWORD PTR [EBP+8] FMUL cPIdiv180 end; // RadToDeg (extended) // function RadToDeg(const Radians: Extended): Extended; begin Result:=Radians * (180 / PI); end; // RadToDeg (single) // function RadToDeg(const Radians: Single): Single; // Result:=Radians * c180divPI; // don't laugh, Delphi's compiler manages to make a nightmare of this one // with pushs, pops, etc. in its default compile... (this one is twice faster !) asm FLD DWORD PTR [EBP+8] FMUL c180divPI end; // NormalizeAngle // function NormalizeAngle(angle : Single) : Single; begin Result:=angle-Int(angle*cInv2PI)*c2PI; if Result>PI then Result:=Result-2*PI else if Result<-PI then Result:=Result+2*PI; end; // NormalizeDegAngle // function NormalizeDegAngle(angle : Single) : Single; begin Result:=angle-Int(angle*cInv360)*c360; if Result>c180 then Result:=Result-c360 else if Result<-c180 then Result:=Result+c360; end; // SinCos (Extended) // procedure SinCos(const Theta: Extended; var Sin, Cos: Extended); register; // EAX contains address of Sin // EDX contains address of Cos // Theta is passed over the stack asm FLD Theta FSINCOS FSTP TBYTE PTR [EDX] // cosine FSTP TBYTE PTR [EAX] // sine end; // SinCos (Double) // procedure SinCos(const Theta: Double; var Sin, Cos: Double); register; // EAX contains address of Sin // EDX contains address of Cos // Theta is passed over the stack asm FLD Theta FSINCOS FSTP QWORD PTR [EDX] // cosine FSTP QWORD PTR [EAX] // sine end; // SinCos (Single) // procedure SinCos(const Theta: Single; var Sin, Cos: Single); register; // EAX contains address of Sin // EDX contains address of Cos // Theta is passed over the stack asm FLD Theta FSINCOS FSTP DWORD PTR [EDX] // cosine FSTP DWORD PTR [EAX] // sine end; // SinCos (Extended w radius) // procedure SinCos(const theta, radius : Double; var Sin, Cos: Extended); register; // EAX contains address of Sin // EDX contains address of Cos // Theta is passed over the stack asm FLD theta FSINCOS FMUL radius FSTP TBYTE PTR [EDX] // cosine FMUL radius FSTP TBYTE PTR [EAX] // sine end; // SinCos (Double w radius) // procedure SinCos(const theta, radius : Double; var Sin, Cos: Double); register; // EAX contains address of Sin // EDX contains address of Cos // Theta is passed over the stack asm FLD theta FSINCOS FMUL radius FSTP QWORD PTR [EDX] // cosine FMUL radius FSTP QWORD PTR [EAX] // sine end; // SinCos (Single w radius) // procedure SinCos(const theta, radius : Single; var Sin, Cos: Single); register; // EAX contains address of Sin // EDX contains address of Cos // Theta is passed over the stack asm FLD theta FSINCOS FMUL radius FSTP DWORD PTR [EDX] // cosine FMUL radius FSTP DWORD PTR [EAX] // sine end; // PrepareSinCosCache // procedure PrepareSinCosCache(var s, c : array of Single; startAngle, stopAngle : Single); var i : Integer; d, alpha, beta : Single; begin Assert((High(s)=High(c)) and (Low(s)=Low(c))); stopAngle:=stopAngle+1e-5; if High(s)>Low(s) then d:=cPIdiv180*(stopAngle-startAngle)/(High(s)-Low(s)) else d:=0; if High(s)-Low(s)<1000 then begin // Fast computation (approx 5.5x) alpha:=2*Sqr(Sin(d*0.5)); beta:=Sin(d); SinCos(startAngle, s[Low(s)], c[Low(s)]); for i:=Low(s) to High(s)-1 do begin // Make use of the incremental formulae: // cos (theta+delta) = cos(theta) - [alpha*cos(theta) + beta*sin(theta)] // sin (theta+delta) = sin(theta) - [alpha*sin(theta) - beta*cos(theta)] c[i+1]:= c[i] - alpha * c[i] - beta * s[i]; s[i+1]:= s[i] - alpha * s[i] + beta * c[i]; end; end else begin // Slower, but maintains precision when steps are small for i:=Low(s) to High(s) do SinCos((i-Low(s))*d+startAngle, s[i], c[i]); end; end; // ArcCos (Extended) // function ArcCos(const x : Extended): Extended; begin Result:=ArcTan2(Sqrt(1 - Sqr(X)), X); end; // ArcCos (Single) // function ArcCos(const x : Single): Single; register; // Result:=ArcTan2(Sqrt(c1 - X * X), X); asm FLD X FMUL ST, ST FSUBR cOne FSQRT FLD X FPATAN end; // ArcSin (Extended) // function ArcSin(const x : Extended) : Extended; begin Result:=ArcTan2(X, Sqrt(1 - Sqr(X))) end; // ArcSin (Single) // function ArcSin(const x : Single) : Single; // Result:=ArcTan2(X, Sqrt(1 - X * X)) asm FLD X FLD ST FMUL ST, ST FSUBR cOne FSQRT FPATAN end; // ArcTan2 (Extended) // function ArcTan2(const y, x : Extended) : Extended; asm FLD Y FLD X FPATAN end; // ArcTan2 (Single) // function ArcTan2(const y, x : Single) : Single; asm FLD Y FLD X FPATAN end; // Tan (Extended) // function Tan(const x : Extended) : Extended; asm FLD X FPTAN FSTP ST(0) // FPTAN pushes 1.0 after result end; // Tan (Single) // function Tan(const x : Single) : Single; asm FLD X FPTAN FSTP ST(0) // FPTAN pushes 1.0 after result end; // CoTan (Extended) // function CoTan(const x : Extended) : Extended; asm FLD X FPTAN FDIVRP end; // CoTan (Single) // function CoTan(const x : Single) : Single; asm FLD X FPTAN FDIVRP end; // RSqrt // function RSqrt(v : Single) : Single; asm test vSIMD, 1 jz @@FPU @@3DNow: lea eax, [ebp+8] db $0F,$6E,$00 /// movd mm0, [eax] db $0F,$0F,$C8,$97 /// pfrsqrt mm1, mm0 db $0F,$6F,$D1 /// movq mm2, mm1 db $0F,$0F,$C9,$B4 /// pfmul mm1, mm1 db $0F,$0F,$C8,$A7 /// pfrsqit1 mm1, mm0 db $0F,$0F,$CA,$B6 /// pfrcpit2 mm1, mm2 db $0F,$7E,$08 /// movd [eax], mm1 db $0F,$0E /// femms fld dword ptr [eax] jmp @@End @@FPU: fld v fsqrt fld1 fdivr @@End: end; // ISqrt // function ISqrt(i : Integer) : Integer; register; //begin // Result:=Round(Sqrt(i)); asm push eax test vSIMD, 1 jz @@FPU @@3DNow: db $0F,$6E,$04,$24 /// movd mm0, [esp] db $0F,$0F,$C8,$0D /// pi2fd mm1, mm0 db $0F,$0F,$D1,$97 /// pfrsqrt mm2, mm1 db $0F,$0F,$DA,$96 /// pfrcp mm3, mm2 db $0F,$0F,$E3,$1D /// pf2id mm4, mm3 db $0F,$7E,$24,$24 /// movd [esp], mm4 db $0F,$0E /// femms pop eax ret @@FPU: fild dword ptr [esp] fsqrt fistp dword ptr [esp] pop eax end; // ILength // function ILength(x, y : Integer) : Integer; asm push edx push eax fild dword ptr [esp] fmul ST(0), ST(0) fild dword ptr [esp+4] fmul ST(0), ST(0) faddp fsqrt fistp dword ptr [esp+4] pop edx pop eax end; // ILength // function ILength(x, y, z : Integer) : Integer; asm push ecx push edx push eax fild dword ptr [esp] fmul ST(0), ST(0) fild dword ptr [esp+4] fmul ST(0), ST(0) faddp fild dword ptr [esp+8] fmul ST(0), ST(0) faddp fsqrt fistp dword ptr [esp+8] pop ecx pop edx pop eax end; // RLength // function RLength(x, y : Single) : Single; asm test vSIMD, 1 jz @@FPU @@3DNow: { test vSIMD, 1 jz @@FPU @@3DNow: movd mm0, dword ptr [ebp+8] punpckldq mm0, qword ptr [ebp+12] pfmul mm0, mm0 pfacc mm0, mm0 pfrsqrt mm1, mm0 movq mm2, mm1 pfmul mm2, mm1 pfrsqit1 mm0, mm2 pfrcpit2 mm0, mm1 movd dword ptr [ebp-4], mm0 femms fld dword ptr [ebp-4] jmp @@End } @@FPU: fld x fmul x fld y fmul y fadd fsqrt fld1 fdivr @@End: end; // RandomPointOnSphere // procedure RandomPointOnSphere(var p : TAffineVector); var t, w : Single; begin p[2]:=2*Random-1; t:=2*PI*Random; w:=Sqrt(1-p[2]*p[2]); SinCos(t, w, p[1], p[0]); end; // Trunc64 (extended) // function Trunc64(v : Extended) : Int64; register; asm SUB ESP,12 FSTCW [ESP] FLDCW cwChop FLD v FISTP qword ptr [ESP+4] FLDCW [ESP] POP ECX POP EAX POP EDX end; // Trunc (single) // function Trunc(v : Single) : Integer; register; asm SUB ESP,8 FSTCW [ESP] FLDCW cwChop FLD v FISTP dword ptr [ESP+4] FLDCW [ESP] POP ECX POP EAX end; // Int (Extended) // function Int(v : Extended) : Extended; asm SUB ESP,4 FSTCW [ESP] FLDCW cwChop FLD v FRNDINT FLDCW [ESP] ADD ESP,4 end; // Int (Single) // function Int(v : Single) : Single; asm SUB ESP,4 FSTCW [ESP] FLDCW cwChop FLD v FRNDINT FLDCW [ESP] ADD ESP,4 end; // Frac (Extended) // function Frac(v : Extended) : Extended; asm SUB ESP,4 FSTCW [ESP] FLDCW cwChop FLD v FLD ST FRNDINT FSUB FLDCW [ESP] ADD ESP,4 end; // Frac (Extended) // function Frac(v : Single) : Single; asm SUB ESP,4 FSTCW [ESP] FLDCW cwChop FLD v FLD ST FRNDINT FSUB FLDCW [ESP] ADD ESP,4 end; // Round64 (Extended); // function Round64(v : Extended) : Int64; register; asm SUB ESP,8 FLD v FISTP qword ptr [ESP] POP EAX POP EDX end; // Round (Single); // function Round(v : Single) : Integer; register; asm SUB ESP,4 FLD v FISTP dword ptr [ESP] POP EAX end; // Ceil64 (Extended) // function Ceil64(v : Extended) : Int64; overload; begin if Frac(v)>0 then Result:=Trunc(v)+1 else Result:=Trunc(v); end; // Ceil (Single) // function Ceil(v : Single) : Integer; overload; begin if Frac(v)>0 then Result:=Trunc(v)+1 else Result:=Trunc(v) end; // Floor64 (Extended) // function Floor64(v : Extended) : Int64; overload; begin if Frac(v)<0 then Result:=Trunc(v)-1 else Result:=Trunc(v); end; // Floor (Single) // function Floor(v : Single) : Integer; overload; begin if Frac(v)<0 then Result:=Trunc(v)-1 else Result:=Trunc(v); end; // Sign // function Sign(x : Single) : Integer; begin if x<0 then Result:=-1 else if x>0 then Result:=1 else Result:=0; end; function IsInRange(const x, a, b : Single) : Boolean; begin if a<b then Result:=(a<=x) and (x<=b) else Result:=(b<=x) and (x<=a); end; // IsInCube (affine) // function IsInCube(const p, d : TAffineVector) : Boolean; overload; begin Result:= ((p[0]>=-d[0]) and (p[0]<=d[0])) and ((p[1]>=-d[1]) and (p[1]<=d[1])) and ((p[2]>=-d[2]) and (p[2]<=d[2])); end; // IsInCube (hmg) // function IsInCube(const p, d : TVector) : Boolean; overload; begin Result:= ((p[0]>=-d[0]) and (p[0]<=d[0])) and ((p[1]>=-d[1]) and (p[1]<=d[1])) and ((p[2]>=-d[2]) and (p[2]<=d[2])); end; // MinFloat (single) // function MinFloat(values : PSingleArray; nbItems : Integer) : Single; var i : Integer; begin if nbItems>0 then begin Result:=values[0]; for i:=1 to nbItems-1 do if values[i]<Result then Result:=values[i]; end else Result:=0; end; // MinFloat (double) // function MinFloat(values : PDoubleArray; nbItems : Integer) : Double; var i : Integer; begin if nbItems>0 then begin Result:=values[0]; for i:=1 to nbItems-1 do if values[i]<Result then Result:=values[i]; end else Result:=0; end; // MinFloat (extended) // function MinFloat(values : PExtendedArray; nbItems : Integer) : Extended; var i : Integer; begin if nbItems>0 then begin Result:=values[0]; for i:=1 to nbItems-1 do if values[i]<Result then Result:=values[i]; end else Result:=0; end; // MinFloat (single 2) // function MinFloat(const v1, v2 : Single) : Single; begin if v1<v2 then Result:=v1 else Result:=v2; end; // MaxFloat (single) // function MaxFloat(values : PSingleArray; nbItems : Integer) : Single; overload; var i : Integer; begin if nbItems>0 then begin Result:=values[0]; for i:=1 to nbItems-1 do if values[i]>Result then Result:=values[i]; end else Result:=0; end; // MaxFloat (double) // function MaxFloat(values : PDoubleArray; nbItems : Integer) : Double; overload; var i : Integer; begin if nbItems>0 then begin Result:=values[0]; for i:=1 to nbItems-1 do if values[i]>Result then Result:=values[i]; end else Result:=0; end; // MaxFloat (extended) // function MaxFloat(values : PExtendedArray; nbItems : Integer) : Extended; overload; var i : Integer; begin if nbItems>0 then begin Result:=values[0]; for i:=1 to nbItems-1 do if values[i]>Result then Result:=values[i]; end else Result:=0; end; // MaxFloat // function MaxFloat(const v1, v2 : Single) : Single; begin if v1>v2 then Result:=v1 else Result:=v2; end; // MaxFloat // function MaxFloat(const v1, v2 : Double) : Double; begin if v1>v2 then Result:=v1 else Result:=v2; end; // MaxFloat // function MaxFloat(const v1, v2 : Extended) : Extended; begin if v1>v2 then Result:=v1 else Result:=v2; end; // MaxFloat // function MaxFloat(const v1, v2, v3 : Single) : Single; begin if v1>=v2 then if v1>=v3 then Result:=v1 else if v3>=v2 then Result:=v3 else Result:=v2 else if v2>=v3 then Result:=v2 else if v3>=v1 then Result:=v3 else result:=v1; end; // MaxFloat // function MaxFloat(const v1, v2, v3 : Double) : Double; begin if v1>=v2 then if v1>=v3 then Result:=v1 else if v3>=v2 then Result:=v3 else Result:=v2 else if v2>=v3 then Result:=v2 else if v3>=v1 then Result:=v3 else result:=v1; end; // MaxFloat // function MaxFloat(const v1, v2, v3 : Extended) : Extended; begin if v1>=v2 then if v1>=v3 then Result:=v1 else if v3>=v2 then Result:=v3 else Result:=v2 else if v2>=v3 then Result:=v2 else if v3>=v1 then Result:=v3 else result:=v1; end; procedure ScaleFloatArray(values : PSingleArray; nb : Integer; var factor : Single); register; asm test vSIMD, 1 jz @@FPU push edx shr edx, 2 or edx, edx jz @@FPU db $0F,$6E,$39 /// movd mm7, [ecx] db $0F,$62,$FF /// punpckldq mm7, mm7 @@3DNowLoop: db $0F,$0D,$48,$40 /// prefetchw [eax+64] db $0F,$6F,$00 /// movq mm0, [eax] db $0F,$6F,$48,$08 /// movq mm1, [eax+8] db $0F,$0F,$C7,$B4 /// pfmul mm0, mm7 db $0F,$0F,$CF,$B4 /// pfmul mm1, mm7 db $0F,$7F,$00 /// movq [eax], mm0 db $0F,$7F,$48,$08 /// movq [eax+8], mm1 add eax, 16 dec edx jnz @@3DNowLoop pop edx and edx, 3 db $0F,$0E /// femms @@FPU: push edx shr edx, 1 or edx, edx jz @@FPULone @@FPULoop: fld dword ptr [eax] fmul dword ptr [ecx] fstp dword ptr [eax] fld dword ptr [eax+4] fmul dword ptr [ecx] fstp dword ptr [eax+4] add eax, 8 dec edx jnz @@FPULoop @@FPULone: pop edx test edx, 1 jz @@End fld dword ptr [eax] fmul dword ptr [ecx] fstp dword ptr [eax] @@End: end; // ScaleFloatArray (array) // procedure ScaleFloatArray(var values : TSingleArray; factor : Single); begin if Length(values)>0 then ScaleFloatArray(@values[0], Length(values), factor); end; initialization //-------------------------------------------------------------- //-------------------------------------------------------------- //-------------------------------------------------------------- try // detect 3DNow! capable CPU (adapted from AMD's "3DNow! Porting Guide") asm pusha mov eax, $80000000 db $0F,$A2 /// cpuid cmp eax, $80000000 jbe @@No3DNow mov eax, $80000001 db $0F,$A2 /// cpuid test edx, $80000000 jz @@No3DNow mov vSIMD, 1 @@No3DNow: popa end; except // trap for old/exotics CPUs vSIMD:=0; end; function GeometryOptimizationMode : String; begin case vSIMD of 0 : Result:='FPU'; 1 : Result:='3DNow!'; 2 : Result:='SSE'; else Result:='*ERR*'; end; end; var // this var is adjusted during "initialization", current values are // + 0 : use standard optimized FPU code // + 1 : use 3DNow! optimized code (requires K6-2/3 CPU) // + 2 : use Intel SSE code (Pentium III, NOT IMPLEMENTED YET !) vSIMD : Byte = 0; var vOldSIMD : Byte; vFPUOnlySectionCounter : Integer; procedure BeginFPUOnlySection; begin if vFPUOnlySectionCounter=0 then vOldSIMD:=vSIMD; Inc(vFPUOnlySectionCounter); vSIMD:=0; end; // EndFPUOnlySection // procedure EndFPUOnlySection; begin Dec(vFPUOnlySectionCounter); Assert(vFPUOnlySectionCounter>=0); if vFPUOnlySectionCounter=0 then vSIMD:=vOldSIMD; end;