Author: Daniel Wischnewski
Recently I ran into the problem of converting text for the Shift-JIS (Japanese
Idioms) code pages when creating an i-mode interface for my companies Content
Management System. But before I was about to start writing all by myself I checked
into the tool Microsoft gave us.
Answer:
All Systems (Win 95+ and WinNT4+) with MS Internet Explorer 4 and newer have a
library named mlang.dll in the Winnt\System32 directory. Usually you can tell
Delphi to simply import these COM Libraries. This one however, Delphi did not. I
started to convert the "most wanted" interface for myself. The results I present
you here.
First I give you the code for the conversion unit, that allows you simply convert
any text from code page interpretation into another one. Following I will shortly
discuss the code and give you a sample of how to use it.
1 uCodePageConverter
2
3 {* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
4 *
5 * Unit Name : uCodePageConverter
6 * Autor : Daniel Wischnewski
7 * Copyright : Copyright © 2002 by gate(n)etwork. All Right Reserved.
8 * Urheber : Daniel Wischnewski
9 *
10 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *}
11
12 unit uCodePageConverter;
13
14 interface
15
16 uses
17 Windows;
18
19 const
20 IID_MLangConvertCharset: TGUID = '{D66D6F98-CDAA-11D0-B822-00C04FC9B31F}';
21 CLASS_MLangConvertCharset: TGUID = '{D66D6F99-CDAA-11D0-B822-00C04FC9B31F}';
22
23 type
24 tagMLCONVCHARF = DWORD;
25
26 const
27 MLCONVCHARF_AUTODETECT: tagMLCONVCHARF = 1;
28 MLCONVCHARF_ENTITIZE: tagMLCONVCHARF = 2;
29
30 type
31 tagCODEPAGE = UINT;
32
33 const
34 CODEPAGE_Thai: tagCODEPAGE = 0874;
35 CODEPAGE_Japanese: tagCODEPAGE = 0932;
36 CODEPAGE_Chinese_PRC: tagCODEPAGE = 0936;
37 CODEPAGE_Korean: tagCODEPAGE = 0949;
38 CODEPAGE_Chinese_Taiwan: tagCODEPAGE = 0950;
39 CODEPAGE_UniCode: tagCODEPAGE = 1200;
40 CODEPAGE_Windows_31_EastEurope: tagCODEPAGE = 1250;
41 CODEPAGE_Windows_31_Cyrillic: tagCODEPAGE = 1251;
42 CODEPAGE_Windows_31_Latin1: tagCODEPAGE = 1252;
43 CODEPAGE_Windows_31_Greek: tagCODEPAGE = 1253;
44 CODEPAGE_Windows_31_Turkish: tagCODEPAGE = 1254;
45 CODEPAGE_Hebrew: tagCODEPAGE = 1255;
46 CODEPAGE_Arabic: tagCODEPAGE = 1256;
47 CODEPAGE_Baltic: tagCODEPAGE = 1257;
48
49 type
50 IMLangConvertCharset = interface
51 ['{D66D6F98-CDAA-11D0-B822-00C04FC9B31F}']
52 function Initialize(
53 uiSrcCodePage: tagCODEPAGE; uiDstCodePage: tagCODEPAGE;
54 dwProperty: tagMLCONVCHARF
55 ): HResult; stdcall;
56 function GetSourceCodePage(
57 out puiSrcCodePage: tagCODEPAGE
58 ): HResult; stdcall;
59 function GetDestinationCodePage(
60 out puiDstCodePage: tagCODEPAGE
61 ): HResult; stdcall;
62 function GetProperty(out pdwProperty: tagMLCONVCHARF): HResult; stdcall;
63 function DoConversion(
64 pSrcStr: PChar; pcSrcSize: PUINT; pDstStr: PChar; pcDstSize: PUINT
65 ): HResult; stdcall;
66 function DoConversionToUnicode(
67 pSrcStr: PChar; pcSrcSize: PUINT; pDstStr: PWChar; pcDstSize: PUINT
68 ): HResult; stdcall;
69 function DoConversionFromUnicode(
70 pSrcStr: PWChar; pcSrcSize: PUINT; pDstStr: PChar; pcDstSize: PUINT
71 ): HResult; stdcall;
72 end;
73
74 CoMLangConvertCharset = class
75 class function Create: IMLangConvertCharset;
76 class function CreateRemote(const MachineName: string): IMLangConvertCharset;
77 end;
78
79 implementation
80
81 uses
82 ComObj;
83
84 { CoMLangConvertCharset }
85
86 class function CoMLangConvertCharset.Create: IMLangConvertCharset;
87 begin
88 Result := CreateComObject(CLASS_MLangConvertCharset) as IMLangConvertCharset;
89 end;
90
91 class function CoMLangConvertCharset.CreateRemote(
92 const MachineName: string
93 ): IMLangConvertCharset;
94 begin
95 Result := CreateRemoteComObject(
96 MachineName, CLASS_MLangConvertCharset
97 ) as IMLangConvertCharset;
98 end;
99
100 end.
As you can see, I did translate only one of the many interfaces, however this one
is the most efficient (according to Microsoft) and will do the job. Further I added
some constants to simplify the task of finding the most important values.
When using this unit to do any code page conersions you must not forget, that the
both code pages (source and destination) must be installed and supported on the
computer that does the translation. OIn the computer that is going to show the
result only the destination code page must be installed and supported.
To test the unit simple create a form with a memo and a button. Add the following
code to the buttons OnClick event. (Do not forget to add the conversion unit to the
uses clause!)
SAMPLE
101
102 procedure TForm1.Button1Click(Sender: TObject);
103 var
104 Conv: IMLangConvertCharset;
105 Source: PWChar;
106 Dest: PChar;
107 SourceSize, DestSize: UINT;
108 begin
109 // connect to MS multi-language lib
110 Conv := CoMLangConvertCharset.Create;
111 // initialize UniCode Translation to Japanese
112 Conv.Initialize(CODEPAGE_UniCode, CODEPAGE_Japanese, MLCONVCHARF_ENTITIZE);
113 // load source (from memo)
114 Source := PWChar(WideString(Memo1.Text));
115 SourceSize := Succ(Length(Memo1.Text));
116 // prepare destination
117 DestSize := 0;
118 // lets calculate size needed
119 Conv.DoConversionFromUnicode(Source, @SourceSize, nil, @DestSize);
120 // reserve memory
121 GetMem(Dest, DestSize);
122 try
123 // convert
124 Conv.DoConversionFromUnicode(Source, @SourceSize, Dest, @DestSize);
125 // show
126 Memo1.Text := Dest;
127 finally
128 // free memory
129 FreeMem(Dest);
130 end;
131 end;
Further Information regarding code page translations you will find at MSDN - IMLangConvertCharset http://msdn.microsoft.com/library/default.asp?url=/library/en-us/wceielng/htm/cerefIMLangConvertCharsetIUnknown.asp
|