add virtual getSize method
[ecparse.git] / CollectionReader.cpp
1 /**
2  * © 2008 by David Roden <droden@gmail.com>
3  */
4
5 #include <stdlib.h>
6 #include <string.h>
7 #include <stdint.h>
8 #include "CollectionReader.h"
9 #include "GlobalSettings.h"
10
11 CollectionReader::CollectionReader(ReaderInput* readerInput) {
12         this->readerInput = readerInput;
13         firstLink = true;
14 }
15
16 CollectionReader::~CollectionReader() {
17 }
18
19 bool CollectionReader::isLineBreakPresent() {
20         size_t indexOfLineBreak = growingBuffer.indexOf('\n');
21         GlobalSettings::isVerbose() && (indexOfLineBreak != (size_t) -1) && fprintf(stderr, "[%s:%d] found line break at %d.\n", __FILE__, __LINE__, indexOfLineBreak);
22         GlobalSettings::isVerbose() && (indexOfLineBreak == (size_t) -1) && fprintf(stderr, "[%s:%d] could not find line break.\n", __FILE__, __LINE__);
23         return indexOfLineBreak != (size_t) -1;
24 }
25
26 ED2KLink* CollectionReader::getNextLink() {
27         if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
28                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
29                 return NULL;
30         }
31         if (firstLink) {
32                 identifyCollectionType();
33                 if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
34                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
35                         return NULL;
36                 }
37                 firstLink = false;
38         }
39         if (isTextCollection) {
40                 while (!readerInput->isEOF() && !isLineBreakPresent()) {
41                         readMoreBytes();
42                 }
43                 if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
44                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
45                         return NULL;
46                 }
47                 size_t indexOfLineBreak = growingBuffer.indexOf('\n');
48                 char* line;
49                 if (indexOfLineBreak == (size_t) -1) {
50                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not find line break, using remainder of file.\n", __FILE__, __LINE__);
51                         indexOfLineBreak = growingBuffer.getRemaining();
52                         line = (char*) malloc(indexOfLineBreak + 1);
53                         growingBuffer.read(line, indexOfLineBreak);
54                         line[indexOfLineBreak] = '\0';
55                 } else {
56                         line = (char*) malloc(indexOfLineBreak + 1);
57                         growingBuffer.read(line, indexOfLineBreak + 1);
58                         if (line[indexOfLineBreak] == '\n') {
59                                 line[indexOfLineBreak] = '\0';
60                         }
61                         if (line[indexOfLineBreak - 1] == '\r') {
62                                 line[indexOfLineBreak - 1] = '\0';
63                         }
64                 }
65                 growingBuffer.cut();
66                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] got line: %s\n", __FILE__, __LINE__, line);
67                 ED2KLink* ed2kLink = ED2KLink::parseED2KLink(line);
68                 free(line);
69                 return ed2kLink;
70         } else {
71                 /* read header */
72                 if (!ensureBufferCapacity(4)) {
73                         return NULL;
74                 }
75                 uint32_t headerTagCount = 0;
76                 growingBuffer.read(&headerTagCount, 4);
77                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d header tags.\n", __FILE__, __LINE__, headerTagCount);
78                 for (uint32_t headerTagIndex = 0; headerTagIndex < headerTagCount; headerTagIndex++) {
79                         uint8_t tagType = 0;
80                         if (!ensureBufferCapacity(1)) {
81                                 return NULL;
82                         }
83                         growingBuffer.read(&tagType, 1);
84                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag type %d.\n", __FILE__, __LINE__, tagType);
85                         if (tagType == 0x02) {
86                                 uint16_t unknown = 0;
87                                 if (!ensureBufferCapacity(2)) {
88                                         return NULL;
89                                 }
90                                 growingBuffer.read(&unknown, 2);
91                                 uint8_t tagName = 0;
92                                 if (!ensureBufferCapacity(1)) {
93                                         return NULL;
94                                 }
95                                 growingBuffer.read(&tagName, 1);
96                                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
97                                 uint16_t tagLength = 0;
98                                 if (!ensureBufferCapacity(2)) {
99                                         return NULL;
100                                 }
101                                 growingBuffer.read(&tagLength, 2);
102                                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
103                                 char* tagValue = (char*) malloc(tagLength + 1);
104                                 if (!ensureBufferCapacity(tagLength)) {
105                                         return NULL;
106                                 }
107                                 growingBuffer.read(tagValue, tagLength);
108                                 tagValue[tagLength] = '\0';
109                                 if (tagName == 0x01) {
110                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read FT_FILENAME: “%s”.\n", __FILE__, __LINE__, tagValue);
111                                 } else if (tagName == 0x31) {
112                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read FT_COLLECTIONAUTHOR: “%s”.\n", __FILE__, __LINE__, tagValue);
113                                 } else {
114                                         GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown tag: “%s”.\n", __FILE__, __LINE__, tagValue);
115                                 }
116                         } else if (tagType == 0x07) {
117                         }
118                 }
119         }
120         return NULL;
121 }
122
123 bool CollectionReader::ensureBufferCapacity(size_t byteCount) {
124         while (!readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) {
125                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] only %d bytes remaning, need at least %d, reading more bytes.\n", __FILE__, __LINE__, growingBuffer.getRemaining(), byteCount);
126                 readMoreBytes();
127         }
128         if (readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) {
129                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] need %d more bytes, but file is EOF.\n", __FILE__, __LINE__, (byteCount - growingBuffer.getRemaining()));
130                 return false;
131         }
132         return true;
133 }
134
135 void CollectionReader::readMoreBytes() {
136         char buffer[1024];
137         size_t readBytes;
138
139         readBytes = readerInput->read(buffer, 1024);
140         growingBuffer.write(buffer, readBytes);
141 }
142
143 void CollectionReader::identifyCollectionType() {
144         int version;
145         size_t readBytes;
146
147         readBytes = readerInput->read(&version, 4);
148         if ((readBytes < 4) || readerInput->isEOF()) {
149                 return;
150         }
151         if (version == 0x01) {
152                 isTextCollection = false;
153                 this->version = 1;
154                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 1\n", __FILE__, __LINE__);
155         } else if (version == 0x02) {
156                 isTextCollection = false;
157                 this->version = 2;
158                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 2\n", __FILE__, __LINE__);
159         } else if (!strncmp("ed2k", (char*) &version, 4)) {
160                 isTextCollection = true;
161                 growingBuffer.write(&version, 4);
162                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified text collection\n", __FILE__, __LINE__);
163         } else  {
164                 GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not identify collection!\n", __FILE__, __LINE__);
165         }
166 }
167