X-Git-Url: https://git.pterodactylus.net/?p=ecparse.git;a=blobdiff_plain;f=CollectionReader.cpp;h=ade76227ede34a74ed35cd0fd191b86e9b5ed1e5;hp=27f9ca62df40e341dd84215a53165b8488241595;hb=7cb0ee62f4712e3f79b387ee0648da27551e9755;hpb=e1f64147e3eedccbbc59658ddb6cf7992dbd50c1 diff --git a/CollectionReader.cpp b/CollectionReader.cpp index 27f9ca6..ade7622 100644 --- a/CollectionReader.cpp +++ b/CollectionReader.cpp @@ -2,13 +2,166 @@ * © 2008 by David Roden */ +#include +#include +#include #include "CollectionReader.h" +#include "GlobalSettings.h" CollectionReader::CollectionReader(ReaderInput* readerInput) { this->readerInput = readerInput; + firstLink = true; } CollectionReader::~CollectionReader() { - delete readerInput; +} + +bool CollectionReader::isLineBreakPresent() { + size_t indexOfLineBreak = growingBuffer.indexOf('\n'); + GlobalSettings::isVerbose() && (indexOfLineBreak != (size_t) -1) && fprintf(stderr, "[%s:%d] found line break at %d.\n", __FILE__, __LINE__, indexOfLineBreak); + GlobalSettings::isVerbose() && (indexOfLineBreak == (size_t) -1) && fprintf(stderr, "[%s:%d] could not find line break.\n", __FILE__, __LINE__); + return indexOfLineBreak != (size_t) -1; +} + +ED2KLink* CollectionReader::getNextLink() { + if (readerInput->isEOF() && !growingBuffer.getRemaining()) { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__); + return NULL; + } + if (firstLink) { + identifyCollectionType(); + if (readerInput->isEOF() && !growingBuffer.getRemaining()) { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__); + return NULL; + } + firstLink = false; + } + if (isTextCollection) { + while (!readerInput->isEOF() && !isLineBreakPresent()) { + readMoreBytes(); + } + if (readerInput->isEOF() && !growingBuffer.getRemaining()) { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__); + return NULL; + } + size_t indexOfLineBreak = growingBuffer.indexOf('\n'); + char* line; + if (indexOfLineBreak == (size_t) -1) { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not find line break, using remainder of file.\n", __FILE__, __LINE__); + indexOfLineBreak = growingBuffer.getRemaining(); + line = (char*) malloc(indexOfLineBreak + 1); + growingBuffer.read(line, indexOfLineBreak); + line[indexOfLineBreak] = '\0'; + } else { + line = (char*) malloc(indexOfLineBreak + 1); + growingBuffer.read(line, indexOfLineBreak + 1); + if (line[indexOfLineBreak] == '\n') { + line[indexOfLineBreak] = '\0'; + } + if (line[indexOfLineBreak - 1] == '\r') { + line[indexOfLineBreak - 1] = '\0'; + } + } + growingBuffer.cut(); + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] got line: %s\n", __FILE__, __LINE__, line); + ED2KLink* ed2kLink = ED2KLink::parseED2KLink(line); + free(line); + return ed2kLink; + } else { + /* read header */ + if (!ensureBufferCapacity(4)) { + return NULL; + } + uint32_t headerTagCount = 0; + growingBuffer.read(&headerTagCount, 4); + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d header tags.\n", __FILE__, __LINE__, headerTagCount); + for (uint32_t headerTagIndex = 0; headerTagIndex < headerTagCount; headerTagIndex++) { + uint8_t tagType = 0; + if (!ensureBufferCapacity(1)) { + return NULL; + } + growingBuffer.read(&tagType, 1); + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag type %d.\n", __FILE__, __LINE__, tagType); + if (tagType == 0x02) { + uint16_t unknown = 0; + if (!ensureBufferCapacity(2)) { + return NULL; + } + growingBuffer.read(&unknown, 2); + uint8_t tagName = 0; + if (!ensureBufferCapacity(1)) { + return NULL; + } + growingBuffer.read(&tagName, 1); + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName); + uint16_t tagLength = 0; + if (!ensureBufferCapacity(2)) { + return NULL; + } + growingBuffer.read(&tagLength, 2); + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength); + char* tagValue = (char*) malloc(tagLength + 1); + if (!ensureBufferCapacity(tagLength)) { + return NULL; + } + growingBuffer.read(tagValue, tagLength); + tagValue[tagLength] = '\0'; + if (tagName == 0x01) { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read FT_FILENAME: “%s”.\n", __FILE__, __LINE__, tagValue); + } else if (tagName == 0x31) { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read FT_COLLECTIONAUTHOR: “%s”.\n", __FILE__, __LINE__, tagValue); + } else { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown tag: “%s”.\n", __FILE__, __LINE__, tagValue); + } + } else if (tagType == 0x07) { + } + } + } + return NULL; +} + +bool CollectionReader::ensureBufferCapacity(size_t byteCount) { + while (!readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] only %d bytes remaning, need at least %d, reading more bytes.\n", __FILE__, __LINE__, growingBuffer.getRemaining(), byteCount); + readMoreBytes(); + } + if (readerInput->isEOF() && (growingBuffer.getRemaining() < byteCount)) { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] need %d more bytes, but file is EOF.\n", __FILE__, __LINE__, (byteCount - growingBuffer.getRemaining())); + return false; + } + return true; +} + +void CollectionReader::readMoreBytes() { + char buffer[1024]; + size_t readBytes; + + readBytes = readerInput->read(buffer, 1024); + growingBuffer.write(buffer, readBytes); +} + +void CollectionReader::identifyCollectionType() { + int version; + size_t readBytes; + + readBytes = readerInput->read(&version, 4); + if ((readBytes < 4) || readerInput->isEOF()) { + return; + } + if (version == 0x01) { + isTextCollection = false; + this->version = 1; + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 1\n", __FILE__, __LINE__); + } else if (version == 0x02) { + isTextCollection = false; + this->version = 2; + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified binary collection, version 2\n", __FILE__, __LINE__); + } else if (!strncmp("ed2k", (char*) &version, 4)) { + isTextCollection = true; + growingBuffer.write(&version, 4); + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified text collection\n", __FILE__, __LINE__); + } else { + GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not identify collection!\n", __FILE__, __LINE__); + } }