parse header tags
[ecparse.git] / CollectionReader.cpp
1 /**
2  * © 2008 by David Roden <droden@gmail.com>
3  */
4
5 #include <stdlib.h>
6 #include <string.h>
7 #include <stdint.h>
8 #include "CollectionReader.h"
9 #include "GlobalSettings.h"
10
11 CollectionReader::CollectionReader(ReaderInput* readerInput) {
12         this->readerInput = readerInput;
13         firstLink = true;
14 }
15
16 CollectionReader::~CollectionReader() {
17 }
18
19 bool CollectionReader::isLineBreakPresent() {
20         size_t indexOfLineBreak = growingBuffer.indexOf('\n');
21         GlobalSettings::isVerbose() && (indexOfLineBreak != (size_t) -1) && fprintf(stderr, "[%s:%d] found line break at %d.\n", __FILE__, __LINE__, indexOfLineBreak);
22         GlobalSettings::isVerbose() && (indexOfLineBreak == (size_t) -1) && fprintf(stderr, "[%s:%d] could not find line break.\n", __FILE__, __LINE__);
23         return indexOfLineBreak != (size_t) -1;
24 }
25
26 StringTag* CollectionReader::readStringTag(bool header) {
27         if (header) {
28                 uint16_t unknown = 0;
29                 if (!ensureBufferCapacity(2)) {
30                         return NULL;
31                 }
32                 growingBuffer.read(&unknown, 2);
33         }
34         uint8_t tagName = 0;
35         if (!ensureBufferCapacity(1)) {
36                 return NULL;
37         }
38         growingBuffer.read(&tagName, 1);
39         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
40         uint16_t tagLength = 0;
41         if (!ensureBufferCapacity(2)) {
42                 return NULL;
43         }
44         growingBuffer.read(&tagLength, 2);
45         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
46         char* tagValue = (char*) malloc(tagLength + 1);
47         if (!ensureBufferCapacity(tagLength)) {
48                 return NULL;
49         }
50         growingBuffer.read(tagValue, tagLength);
51         tagValue[tagLength] = '\0';
52         StringTag* stringTag = new StringTag(tagName, tagValue);
53         free(tagValue);
54         return stringTag;
55 }
56
57 BlobTag* CollectionReader::readBlobTag(bool header) {
58         if (header) {
59                 uint16_t unknown = 0;
60                 if (!ensureBufferCapacity(2)) {
61                         return NULL;
62                 }
63                 growingBuffer.read(&unknown, 2);
64         }
65         uint8_t tagName = 0;
66         if (!ensureBufferCapacity(1)) {
67                 return NULL;
68         }
69         growingBuffer.read(&tagName, 1);
70         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
71         uint32_t tagLength = 0;
72         if (!ensureBufferCapacity(4)) {
73                 return NULL;
74         }
75         growingBuffer.read(&tagLength, 4);
76         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
77         void* tagValue = (char*) malloc(tagLength);
78         if (!ensureBufferCapacity(tagLength)) {
79                 return NULL;
80         }
81         growingBuffer.read(tagValue, tagLength);
82         BlobTag* blobTag = new BlobTag(tagName, tagValue, tagLength);
83         free(tagValue);
84         return blobTag;
85 }
86
87 ED2KLink* CollectionReader::getNextLink() {
88         if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
89                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
90                 return NULL;
91         }
92         if (firstLink) {
93                 identifyCollectionType();
94                 if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
95                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
96                         return NULL;
97                 }
98                 firstLink = false;
99         }
100         if (isTextCollection) {
101                 while (!readerInput->isEOF() && !isLineBreakPresent()) {
102                         readMoreBytes();
103                 }
104                 if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
105                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
106                         return NULL;
107                 }
108                 size_t indexOfLineBreak = growingBuffer.indexOf('\n');
109                 char* line;
110                 if (indexOfLineBreak == (size_t) -1) {
111                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not find line break, using remainder of file.\n", __FILE__, __LINE__);
112                         indexOfLineBreak = growingBuffer.getRemaining();
113                         line = (char*) malloc(indexOfLineBreak + 1);
114                         growingBuffer.read(line, indexOfLineBreak);
115                         line[indexOfLineBreak] = '\0';
116                 } else {
117                         line = (char*) malloc(indexOfLineBreak + 1);
118                         growingBuffer.read(line, indexOfLineBreak + 1);
119                         if (line[indexOfLineBreak] == '\n') {
120                                 line[indexOfLineBreak] = '\0';
121                         }
122                         if (line[indexOfLineBreak - 1] == '\r') {
123                                 line[indexOfLineBreak - 1] = '\0';
124                         }
125                 }
126                 growingBuffer.cut();
127                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] got line: %s\n", __FILE__, __LINE__, line);
128                 ED2KLink* ed2kLink = ED2KLink::parseED2KLink(line);
129                 free(line);
130                 return ed2kLink;
131         } else {
132                 /* read header */
133                 if (!ensureBufferCapacity(4)) {
134                         return NULL;
135                 }
136                 uint32_t headerTagCount = 0;
137                 growingBuffer.read(&headerTagCount, 4);
138                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d header tags.\n", __FILE__, __LINE__, headerTagCount);
139                 for (uint32_t headerTagIndex = 0; headerTagIndex < headerTagCount; headerTagIndex++) {
140                         uint8_t tagType = 0;
141                         if (!ensureBufferCapacity(1)) {
142                                 return NULL;
143                         }
144                         growingBuffer.read(&tagType, 1);
145                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag type %d.\n", __FILE__, __LINE__, tagType);
146                         if (tagType == 0x02) {
147                                 StringTag* stringTag = readStringTag(true);
148                                 if (stringTag->getId() == 0x01) {
149                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_FILENAME: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
150                                 } else if (stringTag->getId() == 0x31) {
151                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_COLLECTIONAUTHOR: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
152                                 } else {
153                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown string in header: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
154                                 }
155                         } else if (tagType == 0x07) {
156                                 BlobTag* blobTag = readBlobTag(true);
157                                 if (blobTag->getId() == 0x32) {
158                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_COLLECTIONAUTHORKEY: %d bytes.\n", __FILE__, __LINE__, blobTag->getSize());
159                                 } else {
160                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown block in header: %d bytes.\n", __FILE__, __LINE__, blobTag->getSize());
161                                 }
162                         } else {
163                                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown tag type: %02x.\n", __FILE__, __LINE__, tagType);
164                         }
165                 }
166         }
167         return NULL;
168 }
169
170 bool CollectionReader::ensureBufferCapacity(size_t byteCount) {
171         while (!readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) {
172                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] only %d bytes remaning, need at least %d, reading more bytes.\n", __FILE__, __LINE__, growingBuffer.getRemaining(), byteCount);
173                 readMoreBytes();
174         }
175         if (readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) {
176                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] need %d more bytes, but file is EOF.\n", __FILE__, __LINE__, (byteCount - growingBuffer.getRemaining()));
177                 return false;
178         }
179         return true;
180 }
181
182 void CollectionReader::readMoreBytes() {
183         char buffer[1024];
184         size_t readBytes;
185
186         readBytes = readerInput->read(buffer, 1024);
187         growingBuffer.write(buffer, readBytes);
188 }
189
190 void CollectionReader::identifyCollectionType() {
191         int version;
192         size_t readBytes;
193
194         readBytes = readerInput->read(&version, 4);
195         if ((readBytes < 4) || readerInput->isEOF()) {
196                 return;
197         }
198         if (version == 0x01) {
199                 isTextCollection = false;
200                 this->version = 1;
201                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 1\n", __FILE__, __LINE__);
202         } else if (version == 0x02) {
203                 isTextCollection = false;
204                 this->version = 2;
205                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 2\n", __FILE__, __LINE__);
206         } else if (!strncmp("ed2k", (char*) &version, 4)) {
207                 isTextCollection = true;
208                 growingBuffer.write(&version, 4);
209                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified text collection\n", __FILE__, __LINE__);
210         } else  {
211                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not identify collection!\n", __FILE__, __LINE__);
212         }
213 }
214