increase version number to 0.1
[ecparse.git] / CollectionReader.cpp
index f58e5bc..7d47ea1 100644 (file)
@@ -22,6 +22,82 @@ bool CollectionReader::isLineBreakPresent() {
        return indexOfLineBreak != (size_t) -1;
 }
 
+StringTag* CollectionReader::readStringTag(bool header) {
+       if (header) {
+               uint16_t unknown = 0;
+               if (!ensureBufferCapacity(2)) {
+                       return NULL;
+               }
+               growingBuffer.read(&unknown, 2);
+       }
+       uint8_t tagName = 0;
+       if (!ensureBufferCapacity(1)) {
+               return NULL;
+       }
+       growingBuffer.read(&tagName, 1);
+       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
+       uint16_t tagLength = 0;
+       if (!ensureBufferCapacity(2)) {
+               return NULL;
+       }
+       growingBuffer.read(&tagLength, 2);
+       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
+       char* tagValue = (char*) malloc(tagLength + 1);
+       if (!ensureBufferCapacity(tagLength) || !tagValue) {
+               return NULL;
+       }
+       growingBuffer.read(tagValue, tagLength);
+       tagValue[tagLength] = '\0';
+       StringTag* stringTag = new StringTag(tagName, tagValue);
+       free(tagValue);
+       return stringTag;
+}
+
+BlobTag* CollectionReader::readBlobTag(bool header) {
+       if (header) {
+               uint16_t unknown = 0;
+               if (!ensureBufferCapacity(2)) {
+                       return NULL;
+               }
+               growingBuffer.read(&unknown, 2);
+       }
+       uint8_t tagName = 0;
+       if (!ensureBufferCapacity(1)) {
+               return NULL;
+       }
+       growingBuffer.read(&tagName, 1);
+       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag name %d.\n", __FILE__, __LINE__, tagName);
+       uint32_t tagLength = 0;
+       if (!ensureBufferCapacity(4)) {
+               return NULL;
+       }
+       growingBuffer.read(&tagLength, 4);
+       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag length %d.\n", __FILE__, __LINE__, tagLength);
+       void* tagValue = (char*) malloc(tagLength);
+       if (!ensureBufferCapacity(tagLength)) {
+               return NULL;
+       }
+       growingBuffer.read(tagValue, tagLength);
+       BlobTag* blobTag = new BlobTag(tagName, tagValue, tagLength);
+       free(tagValue);
+       return blobTag;
+}
+
+HashTag* CollectionReader::readHashTag() {
+       if (!ensureBufferCapacity(1)) {
+               return NULL;
+       }
+       uint8_t tagId = 0;
+       growingBuffer.read(&tagId, 1);
+       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag id %d.\n", __FILE__, __LINE__, tagId);
+       if (!ensureBufferCapacity(16)) {
+               return NULL;
+       }
+       char hash[16];
+       growingBuffer.read(hash, 16);
+       return new HashTag(tagId, hash);
+}
+
 ED2KLink* CollectionReader::getNextLink() {
        if (readerInput->isEOF() && !growingBuffer.getRemaining()) {
                GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
@@ -33,7 +109,6 @@ ED2KLink* CollectionReader::getNextLink() {
                        GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] readInput EOF reached.\n", __FILE__, __LINE__);
                        return NULL;
                }
-               firstLink = false;
        }
        if (isTextCollection) {
                while (!readerInput->isEOF() && !isLineBreakPresent()) {
@@ -65,8 +140,129 @@ ED2KLink* CollectionReader::getNextLink() {
                GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] got line: %s\n", __FILE__, __LINE__, line);
                ED2KLink* ed2kLink = ED2KLink::parseED2KLink(line);
                free(line);
+               firstLink = false;
                return ed2kLink;
+       }
+       if (firstLink) {
+               /* read header */
+               if (!ensureBufferCapacity(4)) {
+                       return NULL;
+               }
+               uint32_t headerTagCount = 0;
+               growingBuffer.read(&headerTagCount, 4);
+               GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d header tags.\n", __FILE__, __LINE__, headerTagCount);
+               for (uint32_t headerTagIndex = 0; headerTagIndex < headerTagCount; headerTagIndex++) {
+                       uint8_t tagType = 0;
+                       if (!ensureBufferCapacity(1)) {
+                               return NULL;
+                       }
+                       growingBuffer.read(&tagType, 1);
+                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read tag type %d.\n", __FILE__, __LINE__, tagType);
+                       if (tagType == 0x02) {
+                               StringTag* stringTag = readStringTag(true);
+                               if (stringTag->getId() == 0x01) {
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_FILENAME: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
+                               } else if (stringTag->getId() == 0x31) {
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_COLLECTIONAUTHOR: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
+                               } else {
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown string in header: “%s”\n", __FILE__, __LINE__, (char*) stringTag->getValue());
+                               }
+                       } else if (tagType == 0x07) {
+                               BlobTag* blobTag = readBlobTag(true);
+                               if (blobTag->getId() == 0x32) {
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] FT_COLLECTIONAUTHORKEY: %d bytes.\n", __FILE__, __LINE__, blobTag->getSize());
+                               } else {
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown block in header: %d bytes.\n", __FILE__, __LINE__, blobTag->getSize());
+                               }
+                       } else {
+                               GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] unknown tag type: %02x.\n", __FILE__, __LINE__, tagType);
+                       }
+               }
+               fileCollectionCount = 0;
+               if (!ensureBufferCapacity(4)) {
+                       return NULL;
+               }
+               growingBuffer.read(&fileCollectionCount, 4);
+               GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d files.\n", __FILE__, __LINE__, fileCollectionCount);
+               collectionFileIndex = 0;
+               firstLink = false;
+       }
+       if (collectionFileIndex < fileCollectionCount) {
+               uint32_t fileTagCount = 0;
+               if (!ensureBufferCapacity(4)) {
+                       return NULL;
+               }
+               growingBuffer.read(&fileTagCount, 4);
+               GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] will read %d file tags.\n", __FILE__, __LINE__, fileTagCount);
+               StringTag* filenameTag = NULL;
+               uint64_t size = 0;
+               HashTag* hashTag = NULL;
+               for (uint32_t fileTagIndex = 0; fileTagIndex < fileTagCount; fileTagIndex++) {
+                       uint8_t tagType = 0;
+                       if (!ensureBufferCapacity(1)) {
+                               return NULL;
+                       }
+                       growingBuffer.read(&tagType, 1);
+                       tagType &= 0x7f;
+                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] reading tag type %d.\n", __FILE__, __LINE__, tagType);
+                       if (tagType == 0x01) {
+                               hashTag = readHashTag();
+                               if (!hashTag) {
+                                       return NULL;
+                               }
+                       } else if (tagType == 0x02) {
+                               StringTag* stringTag = readStringTag();
+                               if (!stringTag) {
+                                       return NULL;
+                               }
+                               if (stringTag->getId() == 0x01) {
+                                       filenameTag = stringTag;
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read file name “%s”.\n", __FILE__, __LINE__, (char*) stringTag->getValue());
+                               } else if (stringTag->getId() == 0xf6) {
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read file comment “%s”.\n", __FILE__, __LINE__, (char*) stringTag->getValue());
+                                       delete stringTag;
+                               }
+                       } else if ((tagType == 0x03) || (tagType == 0x08) || (tagType == 0x09) || (tagType == 0x0b)) {
+                               if (!ensureBufferCapacity(1)) {
+                                       return NULL;
+                               }
+                               uint8_t id = 0;
+                               growingBuffer.read(&id, 1);
+                               if (id == 0x02) {
+                                       int neededBytes = (tagType == 0x03) ? 4 : ((tagType == 0x08) ? 2 : ((tagType == 0x09) ? 1 : 8));
+                                       if (!ensureBufferCapacity(neededBytes)) {
+                                               return NULL;
+                                       }
+                                       growingBuffer.read(&size, neededBytes);
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] file size is %llu bytes.\n", __FILE__, __LINE__, size);
+                               } else if (id == 0xf7) {
+                                       uint8_t fileRating = 0;
+                                       if (!ensureBufferCapacity(1)) {
+                                               return NULL;
+                                       }
+                                       growingBuffer.read(&fileRating, 1);
+                                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] read file rating %d.\n", __FILE__, __LINE__, fileRating);
+                               }
+                       }
+               }
+               collectionFileIndex++;
+               growingBuffer.cut();
+               if (filenameTag && hashTag) {
+                       ED2KLink* ed2kLink = new ED2KLink((char*) filenameTag->getValue(), size, hashTag->getValue());
+                       delete filenameTag;
+                       delete hashTag;
+                       return ed2kLink;
+               } else {
+                       GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] not enough data to decode file.\n", __FILE__, __LINE__);
+               }
+               if (filenameTag) {
+                       delete filenameTag;
+               }
+               if (hashTag) {
+                       delete hashTag;
+               }
        } else {
+               GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] reached end of collection.\n", __FILE__, __LINE__);
        }
        return NULL;
 }
@@ -92,7 +288,6 @@ void CollectionReader::readMoreBytes() {
 }
 
 void CollectionReader::identifyCollectionType() {
-       int version;
        size_t readBytes;
 
        readBytes = readerInput->read(&version, 4);
@@ -110,6 +305,7 @@ void CollectionReader::identifyCollectionType() {
        } else if (!strncmp("ed2k", (char*) &version, 4)) {
                isTextCollection = true;
                growingBuffer.write(&version, 4);
+               version = 0;
                GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] identified text collection\n", __FILE__, __LINE__);
        } else  {
                GlobalSettings::isVerbose() && fprintf(stderr, "[%s:%d] could not identify collection!\n", __FILE__, __LINE__);