parse header only on first link
[ecparse.git] / CollectionReader.cpp
1 /**
2  * © 2008 by David Roden <droden@gmail.com>
3  */
4
5 #include <stdlib.h>
6 #include <string.h>
7 #include "CollectionReader.h"
8 #include "GlobalSettings.h"
9
10 CollectionReader::CollectionReader(ReaderInput* readerInput) {
11         this->readerInput = readerInput;
12         firstLink = true;
13 }
14
15 CollectionReader::~CollectionReader() {
16 }
17
18 bool CollectionReader::isLineBreakPresent() {
19         size_t indexOfLineBreak = growingBuffer.indexOf('\n');
20         GlobalSettings::isVerbose() && (indexOfLineBreak != (size_t) -1) && fprintf(stderr, "[%s:%d] found line break at %d.\n", __FILE__, __LINE__, indexOfLineBreak);
21         GlobalSettings::isVerbose() && (indexOfLineBreak == (size_t) -1) && fprintf(stderr, "[%s:%d] could not find line break.\n", __FILE__, __LINE__);
22         return indexOfLineBreak != (size_t) -1;
23 }
24
25 StringTag* CollectionReader::readStringTag(bool header) {
26         if (header) {
27                 uint16_t unknown = 0;
28                 if (!ensureBufferCapacity(2)) {
29                         return NULL;
30                 }
31                 growingBuffer.read(&unknown, 2);
32         }
33         uint8_t tagName = 0;
34         if (!ensureBufferCapacity(1)) {
35                 return NULL;
36         }
37         growingBuffer.read(&tagName, 1);
38         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
39         uint16_t tagLength = 0;
40         if (!ensureBufferCapacity(2)) {
41                 return NULL;
42         }
43         growingBuffer.read(&tagLength, 2);
44         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
45         char* tagValue = (char*) malloc(tagLength + 1);
46         if (!ensureBufferCapacity(tagLength)) {
47                 return NULL;
48         }
49         growingBuffer.read(tagValue, tagLength);
50         tagValue[tagLength] = '\0';
51         StringTag* stringTag = new StringTag(tagName, tagValue);
52         free(tagValue);
53         return stringTag;
54 }
55
56 BlobTag* CollectionReader::readBlobTag(bool header) {
57         if (header) {
58                 uint16_t unknown = 0;
59                 if (!ensureBufferCapacity(2)) {
60                         return NULL;
61                 }
62                 growingBuffer.read(&unknown, 2);
63         }
64         uint8_t tagName = 0;
65         if (!ensureBufferCapacity(1)) {
66                 return NULL;
67         }
68         growingBuffer.read(&tagName, 1);
69         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
70         uint32_t tagLength = 0;
71         if (!ensureBufferCapacity(4)) {
72                 return NULL;
73         }
74         growingBuffer.read(&tagLength, 4);
75         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
76         void* tagValue = (char*) malloc(tagLength);
77         if (!ensureBufferCapacity(tagLength)) {
78                 return NULL;
79         }
80         growingBuffer.read(tagValue, tagLength);
81         BlobTag* blobTag = new BlobTag(tagName, tagValue, tagLength);
82         free(tagValue);
83         return blobTag;
84 }
85
86 ED2KLink* CollectionReader::getNextLink() {
87         if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
88                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
89                 return NULL;
90         }
91         if (firstLink) {
92                 identifyCollectionType();
93                 if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
94                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
95                         return NULL;
96                 }
97         }
98         if (isTextCollection) {
99                 while (!readerInput->isEOF() && !isLineBreakPresent()) {
100                         readMoreBytes();
101                 }
102                 if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
103                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
104                         return NULL;
105                 }
106                 size_t indexOfLineBreak = growingBuffer.indexOf('\n');
107                 char* line;
108                 if (indexOfLineBreak == (size_t) -1) {
109                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not find line break, using remainder of file.\n", __FILE__, __LINE__);
110                         indexOfLineBreak = growingBuffer.getRemaining();
111                         line = (char*) malloc(indexOfLineBreak + 1);
112                         growingBuffer.read(line, indexOfLineBreak);
113                         line[indexOfLineBreak] = '\0';
114                 } else {
115                         line = (char*) malloc(indexOfLineBreak + 1);
116                         growingBuffer.read(line, indexOfLineBreak + 1);
117                         if (line[indexOfLineBreak] == '\n') {
118                                 line[indexOfLineBreak] = '\0';
119                         }
120                         if (line[indexOfLineBreak - 1] == '\r') {
121                                 line[indexOfLineBreak - 1] = '\0';
122                         }
123                 }
124                 growingBuffer.cut();
125                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] got line: %s\n", __FILE__, __LINE__, line);
126                 ED2KLink* ed2kLink = ED2KLink::parseED2KLink(line);
127                 free(line);
128                 return ed2kLink;
129         }
130         if (firstLink) {
131                 /* read header */
132                 if (!ensureBufferCapacity(4)) {
133                         return NULL;
134                 }
135                 uint32_t headerTagCount = 0;
136                 growingBuffer.read(&headerTagCount, 4);
137                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d header tags.\n", __FILE__, __LINE__, headerTagCount);
138                 for (uint32_t headerTagIndex = 0; headerTagIndex < headerTagCount; headerTagIndex++) {
139                         uint8_t tagType = 0;
140                         if (!ensureBufferCapacity(1)) {
141                                 return NULL;
142                         }
143                         growingBuffer.read(&tagType, 1);
144                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag type %d.\n", __FILE__, __LINE__, tagType);
145                         if (tagType == 0x02) {
146                                 StringTag* stringTag = readStringTag(true);
147                                 if (stringTag->getId() == 0x01) {
148                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_FILENAME: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
149                                 } else if (stringTag->getId() == 0x31) {
150                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_COLLECTIONAUTHOR: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
151                                 } else {
152                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown string in header: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
153                                 }
154                         } else if (tagType == 0x07) {
155                                 BlobTag* blobTag = readBlobTag(true);
156                                 if (blobTag->getId() == 0x32) {
157                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_COLLECTIONAUTHORKEY: %d bytes.\n", __FILE__, __LINE__, blobTag->getSize());
158                                 } else {
159                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown block in header: %d bytes.\n", __FILE__, __LINE__, blobTag->getSize());
160                                 }
161                         } else {
162                                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown tag type: %02x.\n", __FILE__, __LINE__, tagType);
163                         }
164                 }
165                 fileCollectionCount = 0;
166                 if (!ensureBufferCapacity(4)) {
167                         return NULL;
168                 }
169                 growingBuffer.read(&fileCollectionCount, 4);
170                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d file tags.\n", __FILE__, __LINE__, fileCollectionCount);
171                 collectionFileIndex = 0;
172                 firstLink = false;
173         }
174         if (collectionFileIndex < fileCollectionCount) {
175         } else {
176                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] reached end of collection.\n", __FILE__, __LINE__);
177         }
178         return NULL;
179 }
180
181 bool CollectionReader::ensureBufferCapacity(size_t byteCount) {
182         while (!readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) {
183                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] only %d bytes remaning, need at least %d, reading more bytes.\n", __FILE__, __LINE__, growingBuffer.getRemaining(), byteCount);
184                 readMoreBytes();
185         }
186         if (readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) {
187                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] need %d more bytes, but file is EOF.\n", __FILE__, __LINE__, (byteCount - growingBuffer.getRemaining()));
188                 return false;
189         }
190         return true;
191 }
192
193 void CollectionReader::readMoreBytes() {
194         char buffer[1024];
195         size_t readBytes;
196
197         readBytes = readerInput->read(buffer, 1024);
198         growingBuffer.write(buffer, readBytes);
199 }
200
201 void CollectionReader::identifyCollectionType() {
202         size_t readBytes;
203
204         readBytes = readerInput->read(&version, 4);
205         if ((readBytes < 4) || readerInput->isEOF()) {
206                 return;
207         }
208         if (version == 0x01) {
209                 isTextCollection = false;
210                 this->version = 1;
211                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 1\n", __FILE__, __LINE__);
212         } else if (version == 0x02) {
213                 isTextCollection = false;
214                 this->version = 2;
215                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 2\n", __FILE__, __LINE__);
216         } else if (!strncmp("ed2k", (char*) &version, 4)) {
217                 isTextCollection = true;
218                 growingBuffer.write(&version, 4);
219                 version = 0;
220                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified text collection\n", __FILE__, __LINE__);
221         } else  {
222                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not identify collection!\n", __FILE__, __LINE__);
223         }
224 }
225