Author: Paramjeet Reen
How can I split up a file into smaller pieces of specified size and have the source
code simple at the same time.
Answer:
Now why would one want to split up files? A reason could be that it is too large to
be transferred reliably to another computer. Hence you chop it up into snmaller
manageable pieces, transfer the pieces and re-assemble them in the target computer.
Here is a very small, simple and very fast function for splitting a specified file
into smaller files of specified size (in bytes). The function uses streams & is
more or less self explainatory. Error handling is currently minimal & can be
extended. The function does not modify the original file in any manner, but merely
creates new files in the same directory as the original file with sequenced
extensions (.001, .002, ...).
What's the use of splitting if you cannot put them together again? To join up the
split files, you can use the command line:
Copy /B File1 + File2 + File3 ... TargetFile
Save the following code to a file named "SplitFl.pas", use it in your source with
the "Uses SplitFl" clause and you are ready to split (hopefully not of laughter)!
1
2 {******************************************************}
3 {* Description: Splits a specified file into pieces *}
4 {* of specified size. *}
5 {******************************************************}
6 {* Last Modified : 12-Mar-2001 *}
7 {* Author : Paramjeet Reen *}
8 {******************************************************}
9 {* I do not gurantee the fitness of this program. *}
10 {* Please use it at your own risk. *}
11 {******************************************************}
12 {* Category :Freeware. *}
13 {******************************************************}
14
15 unit SplitFl;
16
17 interface
18
19 procedure SplitFile(const pFileName: AnsiString; const pSplitSize: LongInt);
20
21 implementation
22
23 uses
24 Classes, SysUtils, Dialogs;
25
26 function Smaller(const a, b: LongInt): LongInt;
27 begin
28 if (a < b) then
29 begin
30 Result := a;
31 end
32 else if (b > 0) then
33 begin
34 Result := b
35 end
36 else
37 Result := 0;
38 end;
39
40 procedure SplitFile(const pFileName: AnsiString; const pSplitSize: LongInt);
41 var
42 vInpFl: TFileStream;
43 vOutFl: TFileStream;
44 vCtr: Integer;
45 begin
46 vInpFl := TFileStream.Create(pFileName, fmOpenRead);
47
48 if (vInpFl.Size > pSplitSize) then
49 begin
50 vCtr := 0;
51 while (vInpFl.Position < vInpFl.Size) do
52 begin
53 Inc(vCtr);
54 vOutFl := TFileStream.Create(pFileName + '.' + FormatFloat('000', vCtr),
55 fmCreate);
56 vOutFl.CopyFrom(vInpFl, Smaller(pSplitSize, vInpFl.Size - vInpFl.Position));
57 vOutFl.Free;
58 end;
59 end
60 else
61 MessageDlg('File too small to split!', mtInformation, [mbOk], 0);
62
63 vInpFl.Free;
64 end;
65
66 end.
= = = = = = = = = = = = = = file Split Act - I Scene - II = = = = = = = = = = = = =
=
The story so far was that I believed that I had made a decent file
splittingfunction that was both small & fast.However, it was pointed out that it is
not fast when it comes to handling HUGE files.I then discovered the $F000 limit
to the intermediate memory buffer & thought it to be the cause.Also another
suggestion of using the "FILE_FLAG_SEQUENTIAL_SCAN" flag for opening the input &
output files would yield performance benefits.Keeping all the above in mind, I re
- worked my original code to the one given below.However, surprisingly, there is
no appreciable speed benefit!! Perhaps someone can tell me why and suggest
improvements...
67 unit SplitFl;
68
69 interface
70
71 procedure SplitFile(const pFileName: AnsiString; const pSplitSize: LongInt);
72
73 implementation
74
75 uses
76 Classes, SysUtils, Dialogs, Windows;
77
78 function Smaller(const a, b: LongInt): LongInt;
79 begin
80 if (a < b) then
81 begin
82 Result := a;
83 end
84 else if (b > 0) then
85 begin
86 Result := b
87 end
88 else
89 Result := 0;
90 end;
91
92 procedure SplitFile(const pFileName: AnsiString; const pSplitSize: LongInt);
93 var
94 vInpFlHandle: Integer;
95 vOutFlHandle: Integer;
96 vInpBytesLft: Integer;
97 vOutBytesLft: Integer;
98 vBufferSize: Integer;
99 vBytesDone: Integer;
100 vBuffer: Pointer;
101 vCtr: Integer;
102 begin
103
104 //Use one of the following options to open the file.
105 //vInpFlHandle :=
106 Integer(CreateFile(PChar(pFileName),GENERIC_READ,FILE_SHARE_READ,nil,OPEN_EXISTING,F
107 ILE_ATTRIBUTE_NORMAL,FILE_FLAG_SEQUENTIAL_SCAN));
108 vInpFlHandle := FileOpen(pFileName, 0);
109
110 vInpBytesLft := FileSeek(vInpFlHandle, 0, 2);
111
112 if (vInpBytesLft > pSplitSize) then
113 begin
114 vBufferSize := Smaller(GetHeapStatus.TotalUncommitted, pSplitSize);
115 GetMem(vBuffer, vBufferSize);
116
117 FileSeek(vInpFlHandle, 0, 0);
118 vCtr := 0;
119
120 while (vInpBytesLft > 0) do
121 begin
122 Inc(vCtr);
123
124 //Use one of the following options to open the file.
125 //vOutFlHandle := Integer(CreateFile(PChar(pFileName + '.' +
126 FormatFloat('000', vCtr)),GENERIC_READ or
127 GENERIC_WRITE,0,nil,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,FILE_FLAG_SEQUENTIAL_SCAN));
128 vOutFlHandle := FileCreate(pFileName + '.' + FormatFloat('000', vCtr));
129
130 vOutBytesLft := Smaller(vInpBytesLft, pSplitSize);
131
132 while (vOutBytesLft > 0) do
133 begin
134 vBytesDone := FileRead(vInpFlHandle, vBuffer^, Smaller(vOutBytesLft,
135 vBufferSize));
136 FileWrite(vOutFlHandle, vBuffer^, vBytesDone);
137 Dec(vInpBytesLft, vBytesDone);
138 Dec(vOutBytesLft, vBytesDone);
139 end;
140
141 FileClose(vOutFlHandle);
142 end;
143
144 FreeMem(vBuffer);
145 end
146 else
147 MessageDlg('File too small to split!', mtInformation, [mbOk], 0);
148
149 FileClose(vInpFlHandle);
150 end;
151
152 end.
The TFileStream.Create calls the FileCreate in SysUtils, I've had some success, by
creating a separate TFileStream constructor called TFileStream.CreateSeqScan that
calles this SeqScanFileCreate instead of FileCreate, and adds the
FILE_FLAG_SEQUENTIAL_SCAN to the windows API CreateFile
153
154 function SeqScanFileCreate(const FileName: string): Integer;
155 begin
156 Result := Integer(CreateFile(PChar(FileName), GENERIC_READ or GENERIC_WRITE,
157 0, nil, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL or FILE_FLAG_SEQUENTIAL_SCAN, 0));
158 end;
this allows the operating system to read ahead as much as memory allows and write in larger chunks than the $F000
|