Author: Erwin Molendijk
How do I parse a string containing quoted strings and other tokens?
Answer:
1 {
2 These routines can be used to parse strings.
3 Use GetToken to extract normal tokens from InTxt.
4 Use GetTokenStr to extract quoted tokens from InTxt.
5
6 GetTokenStr raises an exception in case of an error. Use
7 try..except blocks to handle these.
8 }
9 type
10 CharSet = set of char;
11 const
12 CS_Space: CharSet = [' '];
13 const
14 CS_CSV: CharSet = [',', ' '];
15 const
16 CS_STab: CharSet = [#9, ' '];
17 const
18 DoubleQuote = '"';
19 const
20 SingleQuote = '''';
21
22 function GetToken(var InTxt: string; SpaceChar: CharSet): string;
23 var
24 i: Integer;
25 begin
26 { Find first SpaceCharacter }
27 i := 1;
28 while (i <= length(InTxt)) and not (InTxt[i] in SpaceChar) do
29 inc(i);
30 { Get text upto that spacechar }
31 Result := Copy(InTxt, 1, i - 1);
32 { Remove fetched part from InTxt }
33 Delete(InTxt, 1, i);
34 { Delete SpaceChars in front of InTxt }
35 i := 1;
36 while (i <= length(InTxt)) and (InTxt[i] in SpaceChar) do
37 inc(i);
38 Delete(InTxt, 1, i - 1);
39 end;
40
41 function GetTokenStr(var InTxt: string; SpaceChar: CharSet; QuoteChar: Char):
42 string;
43 var
44 Done: Boolean;
45 i: Integer;
46 begin
47 { Error checking: Does the string start with a quote? }
48 if Copy(InTxt, 1, 1) <> QuoteChar then
49 raise Exception.Create('Expected ' + QuoteChar + ' but ' + Copy(InTxt, 1, 1) +
50 ' found.');
51
52 { Remove starting quote }
53 Delete(InTxt, 1, 1);
54
55 { Init result }
56 Result := '';
57
58 { Find a quote char that ends the string }
59 repeat
60 { Find first QuoteCharacter }
61 i := 1;
62 while (i <= length(InTxt)) and not (InTxt[i] = QuoteChar) do
63 inc(i);
64
65 { Error checking: Unsuspected end of string? }
66 if i > Length(InTxt) then
67 raise exception.create('Unexpected end of string.');
68
69 { Copy upto (but not including) the quote char }
70 Result := Result + Copy(InTxt, 1, i - 1);
71
72 { Remove parsed part from InTxt }
73 Delete(InTxt, 1, i);
74
75 { If it isn't a double quote, we're done. }
76 Done := (Copy(InTxt, 1, 1) <> QuoteChar);
77
78 { If not done...}
79 if not Done then
80 begin
81 { Insert a quote in the result token }
82 Result := Result + QuoteChar;
83 { Remove 2nd parsed quote from InTxt }
84 Delete(InTxt, 1, 1);
85 end;
86 until Done;
87
88 { Delete SpaceChars in front of InTxt }
89 i := 1;
90 while (i <= length(InTxt)) and (InTxt[i] in SpaceChar) do
91 inc(i);
92 Delete(InTxt, 1, i - 1);
93 end;
94
95 //Usage Example:
96
97 var
98 s: string;
99 begin
100 s := '"John Smith", 500, "This is ""quoted""", "", "That was an empty string"';
101 Memo1.Lines.Add(GetTokenStr(s, CS_CSV, DoubleQuote));
102 Memo1.Lines.Add(GetToken(s, CS_CSV));
103 Memo1.Lines.Add(GetTokenStr(s, CS_CSV, DoubleQuote));
104 Memo1.Lines.Add(GetTokenStr(s, CS_CSV, DoubleQuote));
105 Memo1.Lines.Add(GetTokenStr(s, CS_CSV, DoubleQuote));
106 end;
|