1 package net.pterodactylus.sone.core;
3 import static java.util.concurrent.TimeUnit.*;
4 import static java.util.logging.Logger.getLogger;
5 import static net.pterodactylus.sone.utils.NumberParsers.parseInt;
6 import static net.pterodactylus.sone.utils.NumberParsers.parseLong;
8 import java.io.InputStream;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.List;
15 import java.util.concurrent.*;
16 import java.util.logging.Level;
17 import java.util.logging.Logger;
19 import javax.inject.Inject;
21 import net.pterodactylus.sone.data.Album;
22 import net.pterodactylus.sone.data.Client;
23 import net.pterodactylus.sone.data.Image;
24 import net.pterodactylus.sone.data.Post;
25 import net.pterodactylus.sone.data.PostReply;
26 import net.pterodactylus.sone.data.Profile;
27 import net.pterodactylus.sone.data.Profile.DuplicateField;
28 import net.pterodactylus.sone.data.Profile.EmptyFieldName;
29 import net.pterodactylus.sone.data.Sone;
30 import net.pterodactylus.sone.database.Database;
31 import net.pterodactylus.sone.database.PostBuilder;
32 import net.pterodactylus.sone.database.PostReplyBuilder;
33 import net.pterodactylus.sone.database.SoneBuilder;
34 import net.pterodactylus.util.xml.SimpleXML;
35 import net.pterodactylus.util.xml.XML;
37 import com.codahale.metrics.*;
38 import com.google.common.base.*;
39 import org.w3c.dom.Document;
42 * Parses a {@link Sone} from an XML {@link InputStream}.
44 public class SoneParser {
46 private static final Logger logger = getLogger(SoneParser.class.getName());
47 private static final int MAX_PROTOCOL_VERSION = 0;
48 private final Database database;
49 private final Histogram soneParsingDurationHistogram;
52 public SoneParser(Database database, MetricRegistry metricRegistry) {
53 this.database = database;
54 this.soneParsingDurationHistogram = metricRegistry.histogram("sone.parsing.duration");
57 public Sone parseSone(Sone originalSone, InputStream soneInputStream) throws SoneException {
58 /* TODO - impose a size limit? */
60 Stopwatch stopwatch = Stopwatch.createStarted();
62 /* XML parsing is not thread-safe. */
64 document = XML.transformToDocument(soneInputStream);
66 if (document == null) {
67 /* TODO - mark Sone as bad. */
68 logger.log(Level.WARNING, String.format("Could not parse XML for Sone %s!", originalSone));
72 SoneBuilder soneBuilder = database.newSoneBuilder().from(originalSone.getIdentity());
73 if (originalSone.isLocal()) {
74 soneBuilder = soneBuilder.local();
76 Sone sone = soneBuilder.build();
80 soneXml = SimpleXML.fromDocument(document);
81 } catch (NullPointerException npe1) {
82 /* for some reason, invalid XML can cause NPEs. */
83 logger.log(Level.WARNING, String.format("XML for Sone %s can not be parsed!", sone), npe1);
87 Integer protocolVersion = null;
88 String soneProtocolVersion = soneXml.getValue("protocol-version", null);
89 if (soneProtocolVersion != null) {
90 protocolVersion = parseInt(soneProtocolVersion, null);
92 if (protocolVersion == null) {
93 logger.log(Level.INFO, "No protocol version found, assuming 0.");
97 if (protocolVersion < 0) {
98 logger.log(Level.WARNING, String.format("Invalid protocol version: %d! Not parsing Sone.", protocolVersion));
102 /* check for valid versions. */
103 if (protocolVersion > MAX_PROTOCOL_VERSION) {
104 logger.log(Level.WARNING, String.format("Unknown protocol version: %d! Not parsing Sone.", protocolVersion));
108 String soneTime = soneXml.getValue("time", null);
109 if (soneTime == null) {
110 /* TODO - mark Sone as bad. */
111 logger.log(Level.WARNING, String.format("Downloaded time for Sone %s was null!", sone));
115 sone.setTime(Long.parseLong(soneTime));
116 } catch (NumberFormatException nfe1) {
117 /* TODO - mark Sone as bad. */
118 logger.log(Level.WARNING, String.format("Downloaded Sone %s with invalid time: %s", sone, soneTime));
122 SimpleXML clientXml = soneXml.getNode("client");
123 if (clientXml != null) {
124 String clientName = clientXml.getValue("name", null);
125 String clientVersion = clientXml.getValue("version", null);
126 if ((clientName == null) || (clientVersion == null)) {
127 logger.log(Level.WARNING, String.format("Download Sone %s with client XML but missing name or version!", sone));
130 sone.setClient(new Client(clientName, clientVersion));
133 SimpleXML profileXml = soneXml.getNode("profile");
134 if (profileXml == null) {
135 /* TODO - mark Sone as bad. */
136 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no profile!", sone));
141 String profileFirstName = profileXml.getValue("first-name", null);
142 String profileMiddleName = profileXml.getValue("middle-name", null);
143 String profileLastName = profileXml.getValue("last-name", null);
144 Integer profileBirthDay = parseInt(profileXml.getValue("birth-day", ""), null);
145 Integer profileBirthMonth = parseInt(profileXml.getValue("birth-month", ""), null);
146 Integer profileBirthYear = parseInt(profileXml.getValue("birth-year", ""), null);
147 Profile profile = new Profile(sone).setFirstName(profileFirstName).setMiddleName(profileMiddleName).setLastName(profileLastName);
148 profile.setBirthDay(profileBirthDay).setBirthMonth(profileBirthMonth).setBirthYear(profileBirthYear);
149 /* avatar is processed after images are loaded. */
150 String avatarId = profileXml.getValue("avatar", null);
152 /* parse profile fields. */
153 SimpleXML profileFieldsXml = profileXml.getNode("fields");
154 if (profileFieldsXml != null) {
155 for (SimpleXML fieldXml : profileFieldsXml.getNodes("field")) {
156 String fieldName = fieldXml.getValue("field-name", null);
157 String fieldValue = fieldXml.getValue("field-value", "");
158 if (fieldName == null) {
159 logger.log(Level.WARNING, String.format("Downloaded profile field for Sone %s with missing data! Name: %s, Value: %s", sone, fieldName, fieldValue));
163 profile.addField(fieldName.trim()).setValue(fieldValue);
164 } catch (EmptyFieldName efn1) {
165 logger.log(Level.WARNING, "Empty field name!", efn1);
167 } catch (DuplicateField df1) {
168 logger.log(Level.WARNING, String.format("Duplicate field: %s", fieldName), df1);
175 SimpleXML postsXml = soneXml.getNode("posts");
176 Set<Post> posts = new HashSet<>();
177 if (postsXml == null) {
178 /* TODO - mark Sone as bad. */
179 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no posts!", sone));
181 for (SimpleXML postXml : postsXml.getNodes("post")) {
182 String postId = postXml.getValue("id", null);
183 String postRecipientId = postXml.getValue("recipient", null);
184 String postTime = postXml.getValue("time", null);
185 String postText = postXml.getValue("text", null);
186 if ((postId == null) || (postTime == null) || (postText == null)) {
187 /* TODO - mark Sone as bad. */
188 logger.log(Level.WARNING, String.format("Downloaded post for Sone %s with missing data! ID: %s, Time: %s, Text: %s", sone, postId, postTime, postText));
192 PostBuilder postBuilder = database.newPostBuilder();
193 /* TODO - parse time correctly. */
194 postBuilder.withId(postId).from(sone.getId()).withTime(Long.parseLong(postTime)).withText(postText);
195 if ((postRecipientId != null) && (postRecipientId.length() == 43)) {
196 postBuilder.to(postRecipientId);
198 posts.add(postBuilder.build());
199 } catch (NumberFormatException nfe1) {
200 /* TODO - mark Sone as bad. */
201 logger.log(Level.WARNING, String.format("Downloaded post for Sone %s with invalid time: %s", sone, postTime));
208 SimpleXML repliesXml = soneXml.getNode("replies");
209 Set<PostReply> replies = new HashSet<>();
210 if (repliesXml == null) {
211 /* TODO - mark Sone as bad. */
212 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no replies!", sone));
214 for (SimpleXML replyXml : repliesXml.getNodes("reply")) {
215 String replyId = replyXml.getValue("id", null);
216 String replyPostId = replyXml.getValue("post-id", null);
217 String replyTime = replyXml.getValue("time", null);
218 String replyText = replyXml.getValue("text", null);
219 if ((replyId == null) || (replyPostId == null) || (replyTime == null) || (replyText == null)) {
220 /* TODO - mark Sone as bad. */
221 logger.log(Level.WARNING, String.format("Downloaded reply for Sone %s with missing data! ID: %s, Post: %s, Time: %s, Text: %s", sone, replyId, replyPostId, replyTime, replyText));
225 PostReplyBuilder postReplyBuilder = database.newPostReplyBuilder();
226 /* TODO - parse time correctly. */
227 postReplyBuilder.withId(replyId).from(sone.getId()).to(replyPostId).withTime(Long.parseLong(replyTime)).withText(replyText);
228 replies.add(postReplyBuilder.build());
229 } catch (NumberFormatException nfe1) {
230 /* TODO - mark Sone as bad. */
231 logger.log(Level.WARNING, String.format("Downloaded reply for Sone %s with invalid time: %s", sone, replyTime));
237 /* parse liked post IDs. */
238 SimpleXML likePostIdsXml = soneXml.getNode("post-likes");
239 Set<String> likedPostIds = new HashSet<>();
240 if (likePostIdsXml == null) {
241 /* TODO - mark Sone as bad. */
242 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no post likes!", sone));
244 for (SimpleXML likedPostIdXml : likePostIdsXml.getNodes("post-like")) {
245 String postId = likedPostIdXml.getValue();
246 likedPostIds.add(postId);
250 /* parse liked reply IDs. */
251 SimpleXML likeReplyIdsXml = soneXml.getNode("reply-likes");
252 Set<String> likedReplyIds = new HashSet<>();
253 if (likeReplyIdsXml == null) {
254 /* TODO - mark Sone as bad. */
255 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no reply likes!", sone));
257 for (SimpleXML likedReplyIdXml : likeReplyIdsXml.getNodes("reply-like")) {
258 String replyId = likedReplyIdXml.getValue();
259 likedReplyIds.add(replyId);
264 SimpleXML albumsXml = soneXml.getNode("albums");
265 Map<String, Image> allImages = new HashMap<>();
266 List<Album> topLevelAlbums = new ArrayList<>();
267 Map<String, Album> allAlbums = new HashMap<>();
268 if (albumsXml != null) {
269 for (SimpleXML albumXml : albumsXml.getNodes("album")) {
270 String id = albumXml.getValue("id", null);
271 String parentId = albumXml.getValue("parent", null);
272 String title = albumXml.getValue("title", null);
273 String description = albumXml.getValue("description", "");
274 if ((id == null) || (title == null)) {
275 logger.log(Level.WARNING, String.format("Downloaded Sone %s contains invalid album!", sone));
279 if (parentId != null) {
280 parent = allAlbums.get(parentId);
281 if (parent == null) {
282 logger.log(Level.WARNING, String.format("Downloaded Sone %s has album with invalid parent!", sone));
286 Album album = database.newAlbumBuilder()
292 .setDescription(description)
294 if (parent != null) {
295 parent.addAlbum(album);
297 topLevelAlbums.add(album);
299 allAlbums.put(album.getId(), album);
300 SimpleXML imagesXml = albumXml.getNode("images");
301 if (imagesXml != null) {
302 for (SimpleXML imageXml : imagesXml.getNodes("image")) {
303 String imageId = imageXml.getValue("id", null);
304 String imageCreationTimeString = imageXml.getValue("creation-time", null);
305 String imageKey = imageXml.getValue("key", null);
306 String imageTitle = imageXml.getValue("title", null);
307 String imageDescription = imageXml.getValue("description", "");
308 String imageWidthString = imageXml.getValue("width", null);
309 String imageHeightString = imageXml.getValue("height", null);
310 if ((imageId == null) || (imageCreationTimeString == null) || (imageKey == null) || (imageTitle == null) || (imageWidthString == null) || (imageHeightString == null)) {
311 logger.log(Level.WARNING, String.format("Downloaded Sone %s contains invalid images!", sone));
314 long creationTime = parseLong(imageCreationTimeString, 0L);
315 int imageWidth = parseInt(imageWidthString, 0);
316 int imageHeight = parseInt(imageHeightString, 0);
317 if ((imageWidth < 1) || (imageHeight < 1)) {
318 logger.log(Level.WARNING, String.format("Downloaded Sone %s contains image %s with invalid dimensions (%s, %s)!", sone, imageId, imageWidthString, imageHeightString));
321 Image image = database.newImageBuilder().withId(imageId).build().modify().setSone(sone).setKey(imageKey).setCreationTime(creationTime).update();
322 image = image.modify().setTitle(imageTitle).setDescription(imageDescription).update();
323 image = image.modify().setWidth(imageWidth).setHeight(imageHeight).update();
324 album.addImage(image);
325 allImages.put(imageId, image);
331 /* process avatar. */
332 if (avatarId != null) {
333 profile.setAvatar(allImages.get(avatarId));
336 /* okay, apparently everything was parsed correctly. Now import. */
337 sone.setProfile(profile);
338 sone.setPosts(posts);
339 sone.setReplies(replies);
340 sone.setLikePostIds(likedPostIds);
341 sone.setLikeReplyIds(likedReplyIds);
342 for (Album album : topLevelAlbums) {
343 sone.getRootAlbum().addAlbum(album);
346 // record the duration
348 soneParsingDurationHistogram.update(stopwatch.elapsed(MICROSECONDS));