Mega Code Archive

 
Categories / Delphi / Examples
 

How to split text into words

Title: How to split text into words procedure SplitTextIntoWords(const S: string; words: TstringList); var startpos, endpos: Integer; begin Assert(Assigned(words)); words.Clear; startpos := 1; while startpos = Length(S) do begin // skip non-letters while (startpos = Length(S)) and not IsCharAlpha(S[startpos]) do Inc(startpos); if startpos = Length(S) then begin // find next non-letter endpos := startpos + 1; while (endpos = Length(S)) and IsCharAlpha(S[endpos]) do Inc(endpos); words.Add(Copy(S, startpos, endpos - startpos)); startpos := endpos + 1; end; { If } end; { While } end; { SplitTextIntoWords } function StringMatchesMask(S, mask: string; case_sensitive: Boolean): Boolean; var sIndex, maskIndex: Integer; begin if not case_sensitive then begin S := AnsiUpperCase(S); mask := AnsiUpperCase(mask); end; { If } Result := True; // blatant optimism sIndex := 1; maskIndex := 1; while (sIndex = Length(S)) and (maskIndex = Length(mask)) do begin case mask[maskIndex] of '?': begin // matches any character Inc(sIndex); Inc(maskIndex); end; { case '?' } '*': begin // matches 0 or more characters, so need to check for // next character in mask Inc(maskIndex); if maskIndex Length(mask) then // * at end matches rest of string Exit else if mask[maskindex] in ['*', '?'] then raise Exception.Create('Invalid mask'); // look for mask character in S while (sIndex = Length(S)) and (S[sIndex] mask[maskIndex]) do Inc(sIndex); if sIndex Length(S) then begin // character not found, no match Result := False; Exit; end; { If } end; { Case '*' } else if S[sIndex] = mask[maskIndex] then begin Inc(sIndex); Inc(maskIndex); end { If } else begin // no match Result := False; Exit; end; end; { Case } end; { While } // if we have reached the end of both S and mask we have a complete // match, otherwise we only have a partial match if (sIndex = Length(S)) or (maskIndex = Length(mask)) then Result := False; end; { stringMatchesMask } procedure FindMatchingWords(const S, mask: string; case_sensitive: Boolean; matches: Tstrings); var words: TstringList; i: Integer; begin Assert(Assigned(matches)); words := TstringList.Create; try SplitTextIntoWords(S, words); matches.Clear; for i := 0 to words.Count - 1 do begin if stringMatchesMask(words[i], mask, case_sensitive) then matches.Add(words[i]); end; { For } finally words.Free; end; end; { The Form has one TMemo for the text to check, one TEdit for the mask, one TCheckbox (check = case sensitive), one TListbox for the results, one Tbutton } procedure TForm1.Button1Click(Sender: TObject); begin FindMatchingWords(memo1.Text, edit1.Text, checkbox1.Checked, listbox1.Items); end;