Mega Code Archive

 
Categories / Delphi / Strings
 

Speeding AnsiStrings tricks, and some code (2 Code)

Title: Speeding AnsiStrings tricks, and some code (2-Code) Question: How AnsiStrings work, some tricks and reusable code to reduce unnecessary reallocations. Answer: This is the packaged code, for an intro, see Part I. unit StrSysUtils; // Written by: TheDelphiGuy, 2005 interface uses SysUtils; // Low-level AnsiString stuff //==================================================== function StrLenEqual( const Str1, Str2 : AnsiString ) : boolean; overload; // A fast length comparison procedure StrConcatInit( var aStr{ : AnsiString}; const InitStr : AnsiString; var RealLen : integer; AllocBy : integer = 512 ); overload; // Sets aStr to InitStr, initializes RealLen := length(InitStr), allocating as many [AllocBy]-sized chunks as needed. // You'll normally call this one procedure StrConcatInit( var aStr{ : AnsiString}; InitStr : pchar; var RealLen : integer; AllocBy : integer = 512 ); overload; // Same, with a pchar, so RealLen := [pos of #0 inside InitStr] procedure StrConcatInit( var aStr{ : AnsiString}; var RealLen : integer; AllocBy : integer = 512 ); overload; // Keeps the current value of aStr, initializes RealLen := length(aStr), allocating as many [AllocBy]-sized chunks as needed. procedure StrConcat( var aStr{ : AnsiString}; const NewStr : AnsiString; var RealLen : integer; AllocBy : integer = 512 ); overload; // Usage Example: // strConcatInit( wFullText, '', wLen ); { Init } // for i := low( StrList ) to high( StrList ) do // strConcat( wFullText, StrList[i], wLen ); { Concat & update wLen with real length } // strSetLen( wFullText, wLen ); { Restore real length } procedure StrConcat( var aStr{ : AnsiString}; ch : char; var RealLen : integer; AllocBy : integer = 512 ); overload; procedure StrConcat( var aStr{ : AnsiString}; NewStr : pchar; var RealLen : integer; AllocBy : integer = 512 ); overload; // Same, but will concat a pchar procedure StrInsert( var aStr{ : AnsiString}; const NewStr : AnsiString; At : integer; var RealLen : integer; AllocBy : integer = 512 ); overload; // Like StrConcat, but inserting [NewStr] in [At] position procedure StrInsert( var aStr{ : AnsiString}; NewStr : pchar; At : integer; var RealLen : integer; AllocBy : integer = 512 ); overload; procedure strSetLen( var aStr {: AnsiString}; lenNew : cardinal ); // Simply updates the .strLen field, without reallocating memory. Should be faster than System.SetLength provided that: // aStr is unique (RefCount = 1), *AND* we are reducing the length of the AnsiString // NOTE: While this should reduce memory fragmentation, keep in mind that the extra characters remain allocated, // so in some cases, where a lot of these strings are kept, you may want to revert to System.SetLength. // Useful mainly for calls to the Windows API: // ... // SetLegth( Result, MAX_PATH ); // Count := WinApiCall( Result, MAX_PATH ); // strSetLen( Result, Count ); { Eliminates the reallocation of Result } // ... procedure strDecLen( var aStr {:AnsiString}; Delta : integer ); // See strSetLen procedure StrAssertCapacity( var aStr{ : AnsiString}; RequiredSize : integer; AllocBy : integer = 512 ); // Makes sure [aStr] has room for at least [RequiredSize] characters // if not, length is increased in as many [AllocBy]-sized chunks as needed type // This is the structure found *BEFORE* string characters: PUndocStrStruct = ^TUndocStrStruct; TUndocStrStruct = packed record strRefCount : longint; strLength : longint; end; const strSkew = sizeof( TUndocStrStruct ); strOverhead = sizeof( TUndocStrStruct ) + 1; // Size of struct + the ending #0 // Quick Hashing //==================================================== function strHashCut4( const aStr : AnsiString ) : integer; // Returns the first 4 chars as the hash function strHashCut8( const aStr : AnsiString ) : integer; // Returns a combination of the first 8 chars as the hash implementation ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Utility functions //==================================================== function Sign( Num : integer ) : integer; // -1, 0, or 1 asm // On entry: EAX: Num shl eax, 1 sbb eax, eax lea eax, [eax + eax + 1] end; function NearestMultUp( Num, Step : integer ) : integer; overload; begin Result := Num + step - 1; // Num + sign( Num ) * ( step - 1 ); if Num can be Result := Result - ( Result mod Step ); end; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Quick Hashing //==================================================== function strHashCut4( const aStr : AnsiString ) : integer; var pchStr : pchar absolute aStr; begin case length( aStr ) of 0 : Result := 0; 1, 2 : // Remember that AnsiStrings are #0 terminated, Result := pword( pchStr )^; // so 'a' is really 'a'#0... which we can conveniently consider as a 2-char str else Result := pinteger( pchStr )^; // Same for a 3-char str end; end; function strHashCut8( const aStr : AnsiString ) : integer; var pchStr : pchar absolute aStr; begin case length( aStr ) of 0 : Result := 0; 1, 2 : Result := pword( pchStr )^; 3, 4 : Result := pinteger( pchStr )^; 5, 6 : Result := pinteger( pchStr )^ xor pword( pchStr+4 )^; else Result := pinteger( pchStr )^ xor pinteger( pchStr+4 )^; end; end; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //function StrStruct( const aStr : AnsiString ) : PUndocStrStruct; // begin // if integer( aStr ) 0 // then Result := PUndocStrStruct( integer( aStr ) - StrSkew ) // else Result := nil; // end; function StrLenEqual( const Str1, Str2 : AnsiString ) : boolean; overload; asm // On entry: EAX= Str1, EDX = Str2 cmp eax, edx je @ItsTrue // Str1 = Str2, including Str1 = Str2 = nil or eax, eax // Str1 = '' jz @ItsFalse or edx, edx // Str2 = '' jz @ItsFalse mov ecx, [eax-strSkew].TUndocStrStruct.strLength cmp ecx, [edx-strSkew].TUndocStrStruct.strLength jne @ItsFalse @ItsTrue: or al, 1 ret @ItsFalse: xor eax, eax end; function strGetLen( const aStr : AnsiString ) : integer; // begin // if integer( aStr ) 0 // then Result := PUndocStrStruct( integer( aStr ) - StrSkew ).strLength // else Result := 0; // end; asm // On entry: EAX= aStr test eax, eax je @Quit mov eax, [eax-strSkew].TUndocStrStruct.strLength @Quit: end; procedure strSetLen( var aStr {: AnsiString}; lenNew : cardinal ); {$IFOPT C+} var s : AnsiString absolute aStr; pStr : PUndocStrStruct; begin if integer( aStr ) = 0 then SetLength( s, lenNew ) else begin pStr := PUndocStrStruct( integer( aStr ) - StrSkew ); assert( pStr.strRefCount = 1, 'Should NOT call strSetLen with shared strings (eg, strRefCount 1)!!' ); if cardinal( pStr.strLength ) lenNew then begin pStr.strLength := lenNew; pchar(aStr)[lenNew] := #0; end; end; end; {$ELSE} asm // On entry: EAX= pointer to aStr, EDX = lenNew mov ecx, [eax] // ECX = aStr test ecx, ecx // Empty? je @NormalCall cmp edx, [ecx-strSkew].TUndocStrStruct.strLength jae @NormalCall mov [ecx-strSkew].TUndocStrStruct.strLength, edx mov byte ptr [ecx+edx], 0 // Put the #0 ret @NormalCall: call System.@LStrSetLength end; {$ENDIF} procedure strDecLen( var aStr {:AnsiString}; Delta : integer ); {$IFOPT C+} var s : AnsiString absolute aStr; pStr : PUndocStrStruct; begin if integer( aStr ) = 0 then pStr := nil else pStr := PUndocStrStruct( integer( aStr ) - StrSkew ); assert( pStr.strRefCount = 1, 'Should NOT call strSetLen with shared strings (eg, strRefCount 1)!!' ); assert( pStr.strLength 0, 'Should NOT call strDecLen with shared strings (eg, strRefCount 1)!!' ); if ( pStr nil ) and ( pStr.strLength = Delta ) then begin pStr.strLength := pStr.strLength - Delta; pchar(aStr)[pStr.strLength] := #0; end; end; {$ELSE} asm // On entry: EAX= pointer to aStr, EDX = lenNew mov ecx, [eax] // ECX = aStr test ecx, ecx // Empty? je @NormalCall cmp edx, [ecx-strSkew].TUndocStrStruct.strLength ja @NormalCall sub [ecx-strSkew].TUndocStrStruct.strLength, edx mov byte ptr [ecx+edx], 0 // Put the #0 ret @NormalCall: call System.@LStrSetLength end; {$ENDIF} procedure StrAssertCapacity( var aStr {:AnsiString}; RequiredSize : integer; AllocBy : integer = 512 ); var s : AnsiString absolute aStr; newSize : integer; begin if length( s ) then begin newSize := RequiredSize + AllocBy - 1; newSize := newSize - ( newSize mod AllocBy ); SetLength( s, newSize ); end; end; procedure StrConcatInit( var aStr{ : AnsiString}; var RealLen : integer; AllocBy : integer = 512 ); overload; var s : AnsiString absolute aStr; begin RealLen := 0; SetLength( s, NearestMultUp( RealLen, AllocBy ) ); end; procedure StrConcatInit( var aStr{ : AnsiString}; const InitStr : AnsiString; var RealLen : integer; AllocBy : integer = 512 ); overload; var s : AnsiString absolute aStr; pchStr : pchar absolute aStr; pchInitStr : pchar absolute InitStr; begin RealLen := length( InitStr ); SetLength( s, NearestMultUp( RealLen, AllocBy ) ); if pchInitStr pchStr then Move( pchInitStr[0], pchStr[0], RealLen ); end; procedure StrConcatInit( var aStr{ : AnsiString}; InitStr : pchar; var RealLen : integer; AllocBy : integer = 512 ); overload; var s : AnsiString absolute aStr; pchStr : pchar absolute aStr; begin RealLen := length( InitStr ); SetLength( s, NearestMultUp( RealLen, AllocBy ) ); Move( InitStr[0], pchStr[0], RealLen ); end; procedure StrConcat( var aStr{ : AnsiString}; NewStr : pchar; var RealLen : integer; AllocBy : integer = 512 ); overload; var s : AnsiString absolute aStr; lenNewStr : integer; begin lenNewStr := StrLen( NewStr ); if length( s ) then SetLength( s, NearestMultUp( RealLen + lenNewStr, AllocBy ) ); Move( NewStr[0], pchar(aStr)[RealLen], lenNewStr + 1 ); inc( RealLen, lenNewStr ); end; procedure StrConcat( var aStr{ : AnsiString}; ch : char; var RealLen : integer; AllocBy : integer = 512 ); overload; var s : AnsiString absolute aStr; begin if length( s ) then SetLength( s, RealLen + AllocBy ); inc( RealLen ); pchar(aStr)[RealLen] := ch; end; procedure StrConcat( var aStr{ : AnsiString}; const NewStr : AnsiString; var RealLen : integer; AllocBy : integer = 512 ); overload; var s : AnsiString absolute aStr; lenNewStr : integer; begin lenNewStr := length( NewStr ); if length( s ) then SetLength( s, NearestMultUp( RealLen + lenNewStr, AllocBy ) ); Move( pchar(NewStr)[0], (pchar(aStr) + RealLen)[0], lenNewStr + 1 ); inc( RealLen, lenNewStr ); end; procedure StrInsert( var aStr{ : AnsiString}; NewStr : pchar; At : integer; var RealLen : integer; AllocBy : integer = 512 ); overload; var s : AnsiString absolute aStr; lenNewStr : integer; begin lenNewStr := StrLen( NewStr ); if length( s ) then SetLength( s, NearestMultUp( RealLen + lenNewStr, AllocBy ) ); Move( pchar(aStr)[At-1], pchar(aStr)[lenNewStr + At-1], RealLen - At + 1 ); Move( pchar(NewStr)[0], pchar(aStr)[At], lenNewStr ); inc( RealLen, lenNewStr ); end; procedure StrInsert( var aStr{ : AnsiString}; const NewStr : AnsiString; At : integer; var RealLen : integer; AllocBy : integer = 512 ); overload; var s : AnsiString absolute aStr; lenNewStr : integer; begin lenNewStr := length( NewStr ); if length( s ) then SetLength( s, NearestMultUp( RealLen + lenNewStr, AllocBy ) ); Move( pchar(aStr)[At-1], pchar(aStr)[lenNewStr + At-1], RealLen - At + 1 ); Move( pchar(NewStr)[0], pchar(aStr)[At], lenNewStr ); inc( RealLen, lenNewStr ); end; end.