increase version number to 0.1
[ecparse.git] / CollectionReader.cpp
1 /**
2  * © 2008 by David Roden <droden@gmail.com>
3  */
4
5 #include <stdlib.h>
6 #include <string.h>
7 #include "CollectionReader.h"
8 #include "GlobalSettings.h"
9
10 CollectionReader::CollectionReader(ReaderInput* readerInput) {
11         this->readerInput = readerInput;
12         firstLink = true;
13 }
14
15 CollectionReader::~CollectionReader() {
16 }
17
18 bool CollectionReader::isLineBreakPresent() {
19         size_t indexOfLineBreak = growingBuffer.indexOf('\n');
20         GlobalSettings::isVerbose() && (indexOfLineBreak != (size_t) -1) && fprintf(stderr, "[%s:%d] found line break at %d.\n", __FILE__, __LINE__, indexOfLineBreak);
21         GlobalSettings::isVerbose() && (indexOfLineBreak == (size_t) -1) && fprintf(stderr, "[%s:%d] could not find line break.\n", __FILE__, __LINE__);
22         return indexOfLineBreak != (size_t) -1;
23 }
24
25 StringTag* CollectionReader::readStringTag(bool header) {
26         if (header) {
27                 uint16_t unknown = 0;
28                 if (!ensureBufferCapacity(2)) {
29                         return NULL;
30                 }
31                 growingBuffer.read(&unknown, 2);
32         }
33         uint8_t tagName = 0;
34         if (!ensureBufferCapacity(1)) {
35                 return NULL;
36         }
37         growingBuffer.read(&tagName, 1);
38         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
39         uint16_t tagLength = 0;
40         if (!ensureBufferCapacity(2)) {
41                 return NULL;
42         }
43         growingBuffer.read(&tagLength, 2);
44         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
45         char* tagValue = (char*) malloc(tagLength + 1);
46         if (!ensureBufferCapacity(tagLength) || !tagValue) {
47                 return NULL;
48         }
49         growingBuffer.read(tagValue, tagLength);
50         tagValue[tagLength] = '\0';
51         StringTag* stringTag = new StringTag(tagName, tagValue);
52         free(tagValue);
53         return stringTag;
54 }
55
56 BlobTag* CollectionReader::readBlobTag(bool header) {
57         if (header) {
58                 uint16_t unknown = 0;
59                 if (!ensureBufferCapacity(2)) {
60                         return NULL;
61                 }
62                 growingBuffer.read(&unknown, 2);
63         }
64         uint8_t tagName = 0;
65         if (!ensureBufferCapacity(1)) {
66                 return NULL;
67         }
68         growingBuffer.read(&tagName, 1);
69         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
70         uint32_t tagLength = 0;
71         if (!ensureBufferCapacity(4)) {
72                 return NULL;
73         }
74         growingBuffer.read(&tagLength, 4);
75         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
76         void* tagValue = (char*) malloc(tagLength);
77         if (!ensureBufferCapacity(tagLength)) {
78                 return NULL;
79         }
80         growingBuffer.read(tagValue, tagLength);
81         BlobTag* blobTag = new BlobTag(tagName, tagValue, tagLength);
82         free(tagValue);
83         return blobTag;
84 }
85
86 HashTag* CollectionReader::readHashTag() {
87         if (!ensureBufferCapacity(1)) {
88                 return NULL;
89         }
90         uint8_t tagId = 0;
91         growingBuffer.read(&tagId, 1);
92         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag id %d.\n", __FILE__, __LINE__, tagId);
93         if (!ensureBufferCapacity(16)) {
94                 return NULL;
95         }
96         char hash[16];
97         growingBuffer.read(hash, 16);
98         return new HashTag(tagId, hash);
99 }
100
101 ED2KLink* CollectionReader::getNextLink() {
102         if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
103                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
104                 return NULL;
105         }
106         if (firstLink) {
107                 identifyCollectionType();
108                 if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
109                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
110                         return NULL;
111                 }
112         }
113         if (isTextCollection) {
114                 while (!readerInput->isEOF() && !isLineBreakPresent()) {
115                         readMoreBytes();
116                 }
117                 if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
118                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
119                         return NULL;
120                 }
121                 size_t indexOfLineBreak = growingBuffer.indexOf('\n');
122                 char* line;
123                 if (indexOfLineBreak == (size_t) -1) {
124                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not find line break, using remainder of file.\n", __FILE__, __LINE__);
125                         indexOfLineBreak = growingBuffer.getRemaining();
126                         line = (char*) malloc(indexOfLineBreak + 1);
127                         growingBuffer.read(line, indexOfLineBreak);
128                         line[indexOfLineBreak] = '\0';
129                 } else {
130                         line = (char*) malloc(indexOfLineBreak + 1);
131                         growingBuffer.read(line, indexOfLineBreak + 1);
132                         if (line[indexOfLineBreak] == '\n') {
133                                 line[indexOfLineBreak] = '\0';
134                         }
135                         if (line[indexOfLineBreak - 1] == '\r') {
136                                 line[indexOfLineBreak - 1] = '\0';
137                         }
138                 }
139                 growingBuffer.cut();
140                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] got line: %s\n", __FILE__, __LINE__, line);
141                 ED2KLink* ed2kLink = ED2KLink::parseED2KLink(line);
142                 free(line);
143                 firstLink = false;
144                 return ed2kLink;
145         }
146         if (firstLink) {
147                 /* read header */
148                 if (!ensureBufferCapacity(4)) {
149                         return NULL;
150                 }
151                 uint32_t headerTagCount = 0;
152                 growingBuffer.read(&headerTagCount, 4);
153                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d header tags.\n", __FILE__, __LINE__, headerTagCount);
154                 for (uint32_t headerTagIndex = 0; headerTagIndex < headerTagCount; headerTagIndex++) {
155                         uint8_t tagType = 0;
156                         if (!ensureBufferCapacity(1)) {
157                                 return NULL;
158                         }
159                         growingBuffer.read(&tagType, 1);
160                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag type %d.\n", __FILE__, __LINE__, tagType);
161                         if (tagType == 0x02) {
162                                 StringTag* stringTag = readStringTag(true);
163                                 if (stringTag->getId() == 0x01) {
164                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_FILENAME: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
165                                 } else if (stringTag->getId() == 0x31) {
166                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_COLLECTIONAUTHOR: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
167                                 } else {
168                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown string in header: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
169                                 }
170                         } else if (tagType == 0x07) {
171                                 BlobTag* blobTag = readBlobTag(true);
172                                 if (blobTag->getId() == 0x32) {
173                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_COLLECTIONAUTHORKEY: %d bytes.\n", __FILE__, __LINE__, blobTag->getSize());
174                                 } else {
175                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown block in header: %d bytes.\n", __FILE__, __LINE__, blobTag->getSize());
176                                 }
177                         } else {
178                                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown tag type: %02x.\n", __FILE__, __LINE__, tagType);
179                         }
180                 }
181                 fileCollectionCount = 0;
182                 if (!ensureBufferCapacity(4)) {
183                         return NULL;
184                 }
185                 growingBuffer.read(&fileCollectionCount, 4);
186                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d files.\n", __FILE__, __LINE__, fileCollectionCount);
187                 collectionFileIndex = 0;
188                 firstLink = false;
189         }
190         if (collectionFileIndex < fileCollectionCount) {
191                 uint32_t fileTagCount = 0;
192                 if (!ensureBufferCapacity(4)) {
193                         return NULL;
194                 }
195                 growingBuffer.read(&fileTagCount, 4);
196                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d file tags.\n", __FILE__, __LINE__, fileTagCount);
197                 StringTag* filenameTag = NULL;
198                 uint64_t size = 0;
199                 HashTag* hashTag = NULL;
200                 for (uint32_t fileTagIndex = 0; fileTagIndex < fileTagCount; fileTagIndex++) {
201                         uint8_t tagType = 0;
202                         if (!ensureBufferCapacity(1)) {
203                                 return NULL;
204                         }
205                         growingBuffer.read(&tagType, 1);
206                         tagType &= 0x7f;
207                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] reading tag type %d.\n", __FILE__, __LINE__, tagType);
208                         if (tagType == 0x01) {
209                                 hashTag = readHashTag();
210                                 if (!hashTag) {
211                                         return NULL;
212                                 }
213                         } else if (tagType == 0x02) {
214                                 StringTag* stringTag = readStringTag();
215                                 if (!stringTag) {
216                                         return NULL;
217                                 }
218                                 if (stringTag->getId() == 0x01) {
219                                         filenameTag = stringTag;
220                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read file name “%s”.\n", __FILE__, __LINE__, (char*) stringTag->getValue());
221                                 } else if (stringTag->getId() == 0xf6) {
222                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read file comment “%s”.\n", __FILE__, __LINE__, (char*) stringTag->getValue());
223                                         delete stringTag;
224                                 }
225                         } else if ((tagType == 0x03) || (tagType == 0x08) || (tagType == 0x09) || (tagType == 0x0b)) {
226                                 if (!ensureBufferCapacity(1)) {
227                                         return NULL;
228                                 }
229                                 uint8_t id = 0;
230                                 growingBuffer.read(&id, 1);
231                                 if (id == 0x02) {
232                                         int neededBytes = (tagType == 0x03) ? 4 : ((tagType == 0x08) ? 2 : ((tagType == 0x09) ? 1 : 8));
233                                         if (!ensureBufferCapacity(neededBytes)) {
234                                                 return NULL;
235                                         }
236                                         growingBuffer.read(&size, neededBytes);
237                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] file size is %llu bytes.\n", __FILE__, __LINE__, size);
238                                 } else if (id == 0xf7) {
239                                         uint8_t fileRating = 0;
240                                         if (!ensureBufferCapacity(1)) {
241                                                 return NULL;
242                                         }
243                                         growingBuffer.read(&fileRating, 1);
244                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read file rating %d.\n", __FILE__, __LINE__, fileRating);
245                                 }
246                         }
247                 }
248                 collectionFileIndex++;
249                 growingBuffer.cut();
250                 if (filenameTag && hashTag) {
251                         ED2KLink* ed2kLink = new ED2KLink((char*) filenameTag->getValue(), size, hashTag->getValue());
252                         delete filenameTag;
253                         delete hashTag;
254                         return ed2kLink;
255                 } else {
256                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] not enough data to decode file.\n", __FILE__, __LINE__);
257                 }
258                 if (filenameTag) {
259                         delete filenameTag;
260                 }
261                 if (hashTag) {
262                         delete hashTag;
263                 }
264         } else {
265                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] reached end of collection.\n", __FILE__, __LINE__);
266         }
267         return NULL;
268 }
269
270 bool CollectionReader::ensureBufferCapacity(size_t byteCount) {
271         while (!readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) {
272                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] only %d bytes remaning, need at least %d, reading more bytes.\n", __FILE__, __LINE__, growingBuffer.getRemaining(), byteCount);
273                 readMoreBytes();
274         }
275         if (readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) {
276                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] need %d more bytes, but file is EOF.\n", __FILE__, __LINE__, (byteCount - growingBuffer.getRemaining()));
277                 return false;
278         }
279         return true;
280 }
281
282 void CollectionReader::readMoreBytes() {
283         char buffer[1024];
284         size_t readBytes;
285
286         readBytes = readerInput->read(buffer, 1024);
287         growingBuffer.write(buffer, readBytes);
288 }
289
290 void CollectionReader::identifyCollectionType() {
291         size_t readBytes;
292
293         readBytes = readerInput->read(&version, 4);
294         if ((readBytes < 4) || readerInput->isEOF()) {
295                 return;
296         }
297         if (version == 0x01) {
298                 isTextCollection = false;
299                 this->version = 1;
300                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 1\n", __FILE__, __LINE__);
301         } else if (version == 0x02) {
302                 isTextCollection = false;
303                 this->version = 2;
304                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 2\n", __FILE__, __LINE__);
305         } else if (!strncmp("ed2k", (char*) &version, 4)) {
306                 isTextCollection = true;
307                 growingBuffer.write(&version, 4);
308                 version = 0;
309                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified text collection\n", __FILE__, __LINE__);
310         } else  {
311                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not identify collection!\n", __FILE__, __LINE__);
312         }
313 }
314