✨ Record Sone parsing durations
[Sone.git] / src / main / java / net / pterodactylus / sone / core / SoneParser.java
1 package net.pterodactylus.sone.core;
2
3 import static java.util.concurrent.TimeUnit.*;
4 import static java.util.logging.Logger.getLogger;
5 import static net.pterodactylus.sone.utils.NumberParsers.parseInt;
6 import static net.pterodactylus.sone.utils.NumberParsers.parseLong;
7
8 import java.io.InputStream;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.concurrent.*;
16 import java.util.logging.Level;
17 import java.util.logging.Logger;
18
19 import javax.inject.Inject;
20
21 import net.pterodactylus.sone.data.Album;
22 import net.pterodactylus.sone.data.Client;
23 import net.pterodactylus.sone.data.Image;
24 import net.pterodactylus.sone.data.Post;
25 import net.pterodactylus.sone.data.PostReply;
26 import net.pterodactylus.sone.data.Profile;
27 import net.pterodactylus.sone.data.Profile.DuplicateField;
28 import net.pterodactylus.sone.data.Profile.EmptyFieldName;
29 import net.pterodactylus.sone.data.Sone;
30 import net.pterodactylus.sone.database.Database;
31 import net.pterodactylus.sone.database.PostBuilder;
32 import net.pterodactylus.sone.database.PostReplyBuilder;
33 import net.pterodactylus.sone.database.SoneBuilder;
34 import net.pterodactylus.util.xml.SimpleXML;
35 import net.pterodactylus.util.xml.XML;
36
37 import com.codahale.metrics.*;
38 import com.google.common.base.*;
39 import org.w3c.dom.Document;
40
41 /**
42  * Parses a {@link Sone} from an XML {@link InputStream}.
43  */
44 public class SoneParser {
45
46         private static final Logger logger = getLogger(SoneParser.class.getName());
47         private static final int MAX_PROTOCOL_VERSION = 0;
48         private final Database database;
49         private final Histogram soneParsingDurationHistogram;
50
51         @Inject
52         public SoneParser(Database database, MetricRegistry metricRegistry) {
53                 this.database = database;
54                 this.soneParsingDurationHistogram = metricRegistry.histogram("sone.parsing.duration");
55         }
56
57         public Sone parseSone(Sone originalSone, InputStream soneInputStream) throws SoneException {
58                 /* TODO - impose a size limit? */
59
60                 Stopwatch stopwatch = Stopwatch.createStarted();
61                 Document document;
62                 /* XML parsing is not thread-safe. */
63                 synchronized (this) {
64                         document = XML.transformToDocument(soneInputStream);
65                 }
66                 if (document == null) {
67                         /* TODO - mark Sone as bad. */
68                         logger.log(Level.WARNING, String.format("Could not parse XML for Sone %s!", originalSone));
69                         return null;
70                 }
71
72                 SoneBuilder soneBuilder = database.newSoneBuilder().from(originalSone.getIdentity());
73                 if (originalSone.isLocal()) {
74                         soneBuilder = soneBuilder.local();
75                 }
76                 Sone sone = soneBuilder.build();
77
78                 SimpleXML soneXml;
79                 try {
80                         soneXml = SimpleXML.fromDocument(document);
81                 } catch (NullPointerException npe1) {
82                         /* for some reason, invalid XML can cause NPEs. */
83                         logger.log(Level.WARNING, String.format("XML for Sone %s can not be parsed!", sone), npe1);
84                         return null;
85                 }
86
87                 Integer protocolVersion = null;
88                 String soneProtocolVersion = soneXml.getValue("protocol-version", null);
89                 if (soneProtocolVersion != null) {
90                         protocolVersion = parseInt(soneProtocolVersion, null);
91                 }
92                 if (protocolVersion == null) {
93                         logger.log(Level.INFO, "No protocol version found, assuming 0.");
94                         protocolVersion = 0;
95                 }
96
97                 if (protocolVersion < 0) {
98                         logger.log(Level.WARNING, String.format("Invalid protocol version: %d! Not parsing Sone.", protocolVersion));
99                         return null;
100                 }
101
102                 /* check for valid versions. */
103                 if (protocolVersion > MAX_PROTOCOL_VERSION) {
104                         logger.log(Level.WARNING, String.format("Unknown protocol version: %d! Not parsing Sone.", protocolVersion));
105                         return null;
106                 }
107
108                 String soneTime = soneXml.getValue("time", null);
109                 if (soneTime == null) {
110                         /* TODO - mark Sone as bad. */
111                         logger.log(Level.WARNING, String.format("Downloaded time for Sone %s was null!", sone));
112                         return null;
113                 }
114                 try {
115                         sone.setTime(Long.parseLong(soneTime));
116                 } catch (NumberFormatException nfe1) {
117                         /* TODO - mark Sone as bad. */
118                         logger.log(Level.WARNING, String.format("Downloaded Sone %s with invalid time: %s", sone, soneTime));
119                         return null;
120                 }
121
122                 SimpleXML clientXml = soneXml.getNode("client");
123                 if (clientXml != null) {
124                         String clientName = clientXml.getValue("name", null);
125                         String clientVersion = clientXml.getValue("version", null);
126                         if ((clientName == null) || (clientVersion == null)) {
127                                 logger.log(Level.WARNING, String.format("Download Sone %s with client XML but missing name or version!", sone));
128                                 return null;
129                         }
130                         sone.setClient(new Client(clientName, clientVersion));
131                 }
132
133                 SimpleXML profileXml = soneXml.getNode("profile");
134                 if (profileXml == null) {
135                         /* TODO - mark Sone as bad. */
136                         logger.log(Level.WARNING, String.format("Downloaded Sone %s has no profile!", sone));
137                         return null;
138                 }
139
140                 /* parse profile. */
141                 String profileFirstName = profileXml.getValue("first-name", null);
142                 String profileMiddleName = profileXml.getValue("middle-name", null);
143                 String profileLastName = profileXml.getValue("last-name", null);
144                 Integer profileBirthDay = parseInt(profileXml.getValue("birth-day", ""), null);
145                 Integer profileBirthMonth = parseInt(profileXml.getValue("birth-month", ""), null);
146                 Integer profileBirthYear = parseInt(profileXml.getValue("birth-year", ""), null);
147                 Profile profile = new Profile(sone).setFirstName(profileFirstName).setMiddleName(profileMiddleName).setLastName(profileLastName);
148                 profile.setBirthDay(profileBirthDay).setBirthMonth(profileBirthMonth).setBirthYear(profileBirthYear);
149                 /* avatar is processed after images are loaded. */
150                 String avatarId = profileXml.getValue("avatar", null);
151
152                 /* parse profile fields. */
153                 SimpleXML profileFieldsXml = profileXml.getNode("fields");
154                 if (profileFieldsXml != null) {
155                         for (SimpleXML fieldXml : profileFieldsXml.getNodes("field")) {
156                                 String fieldName = fieldXml.getValue("field-name", null);
157                                 String fieldValue = fieldXml.getValue("field-value", "");
158                                 if (fieldName == null) {
159                                         logger.log(Level.WARNING, String.format("Downloaded profile field for Sone %s with missing data! Name: %s, Value: %s", sone, fieldName, fieldValue));
160                                         return null;
161                                 }
162                                 try {
163                                         profile.addField(fieldName.trim()).setValue(fieldValue);
164                                 } catch (EmptyFieldName efn1) {
165                                         logger.log(Level.WARNING, "Empty field name!", efn1);
166                                         return null;
167                                 } catch (DuplicateField df1) {
168                                         logger.log(Level.WARNING, String.format("Duplicate field: %s", fieldName), df1);
169                                         return null;
170                                 }
171                         }
172                 }
173
174                 /* parse posts. */
175                 SimpleXML postsXml = soneXml.getNode("posts");
176                 Set<Post> posts = new HashSet<>();
177                 if (postsXml == null) {
178                         /* TODO - mark Sone as bad. */
179                         logger.log(Level.WARNING, String.format("Downloaded Sone %s has no posts!", sone));
180                 } else {
181                         for (SimpleXML postXml : postsXml.getNodes("post")) {
182                                 String postId = postXml.getValue("id", null);
183                                 String postRecipientId = postXml.getValue("recipient", null);
184                                 String postTime = postXml.getValue("time", null);
185                                 String postText = postXml.getValue("text", null);
186                                 if ((postId == null) || (postTime == null) || (postText == null)) {
187                                         /* TODO - mark Sone as bad. */
188                                         logger.log(Level.WARNING, String.format("Downloaded post for Sone %s with missing data! ID: %s, Time: %s, Text: %s", sone, postId, postTime, postText));
189                                         return null;
190                                 }
191                                 try {
192                                         PostBuilder postBuilder = database.newPostBuilder();
193                                         /* TODO - parse time correctly. */
194                                         postBuilder.withId(postId).from(sone.getId()).withTime(Long.parseLong(postTime)).withText(postText);
195                                         if ((postRecipientId != null) && (postRecipientId.length() == 43)) {
196                                                 postBuilder.to(postRecipientId);
197                                         }
198                                         posts.add(postBuilder.build());
199                                 } catch (NumberFormatException nfe1) {
200                                         /* TODO - mark Sone as bad. */
201                                         logger.log(Level.WARNING, String.format("Downloaded post for Sone %s with invalid time: %s", sone, postTime));
202                                         return null;
203                                 }
204                         }
205                 }
206
207                 /* parse replies. */
208                 SimpleXML repliesXml = soneXml.getNode("replies");
209                 Set<PostReply> replies = new HashSet<>();
210                 if (repliesXml == null) {
211                         /* TODO - mark Sone as bad. */
212                         logger.log(Level.WARNING, String.format("Downloaded Sone %s has no replies!", sone));
213                 } else {
214                         for (SimpleXML replyXml : repliesXml.getNodes("reply")) {
215                                 String replyId = replyXml.getValue("id", null);
216                                 String replyPostId = replyXml.getValue("post-id", null);
217                                 String replyTime = replyXml.getValue("time", null);
218                                 String replyText = replyXml.getValue("text", null);
219                                 if ((replyId == null) || (replyPostId == null) || (replyTime == null) || (replyText == null)) {
220                                         /* TODO - mark Sone as bad. */
221                                         logger.log(Level.WARNING, String.format("Downloaded reply for Sone %s with missing data! ID: %s, Post: %s, Time: %s, Text: %s", sone, replyId, replyPostId, replyTime, replyText));
222                                         return null;
223                                 }
224                                 try {
225                                         PostReplyBuilder postReplyBuilder = database.newPostReplyBuilder();
226                                         /* TODO - parse time correctly. */
227                                         postReplyBuilder.withId(replyId).from(sone.getId()).to(replyPostId).withTime(Long.parseLong(replyTime)).withText(replyText);
228                                         replies.add(postReplyBuilder.build());
229                                 } catch (NumberFormatException nfe1) {
230                                         /* TODO - mark Sone as bad. */
231                                         logger.log(Level.WARNING, String.format("Downloaded reply for Sone %s with invalid time: %s", sone, replyTime));
232                                         return null;
233                                 }
234                         }
235                 }
236
237                 /* parse liked post IDs. */
238                 SimpleXML likePostIdsXml = soneXml.getNode("post-likes");
239                 Set<String> likedPostIds = new HashSet<>();
240                 if (likePostIdsXml == null) {
241                         /* TODO - mark Sone as bad. */
242                         logger.log(Level.WARNING, String.format("Downloaded Sone %s has no post likes!", sone));
243                 } else {
244                         for (SimpleXML likedPostIdXml : likePostIdsXml.getNodes("post-like")) {
245                                 String postId = likedPostIdXml.getValue();
246                                 likedPostIds.add(postId);
247                         }
248                 }
249
250                 /* parse liked reply IDs. */
251                 SimpleXML likeReplyIdsXml = soneXml.getNode("reply-likes");
252                 Set<String> likedReplyIds = new HashSet<>();
253                 if (likeReplyIdsXml == null) {
254                         /* TODO - mark Sone as bad. */
255                         logger.log(Level.WARNING, String.format("Downloaded Sone %s has no reply likes!", sone));
256                 } else {
257                         for (SimpleXML likedReplyIdXml : likeReplyIdsXml.getNodes("reply-like")) {
258                                 String replyId = likedReplyIdXml.getValue();
259                                 likedReplyIds.add(replyId);
260                         }
261                 }
262
263                 /* parse albums. */
264                 SimpleXML albumsXml = soneXml.getNode("albums");
265                 Map<String, Image> allImages = new HashMap<>();
266                 List<Album> topLevelAlbums = new ArrayList<>();
267                 Map<String, Album> allAlbums = new HashMap<>();
268                 if (albumsXml != null) {
269                         for (SimpleXML albumXml : albumsXml.getNodes("album")) {
270                                 String id = albumXml.getValue("id", null);
271                                 String parentId = albumXml.getValue("parent", null);
272                                 String title = albumXml.getValue("title", null);
273                                 String description = albumXml.getValue("description", "");
274                                 if ((id == null) || (title == null)) {
275                                         logger.log(Level.WARNING, String.format("Downloaded Sone %s contains invalid album!", sone));
276                                         return null;
277                                 }
278                                 Album parent = null;
279                                 if (parentId != null) {
280                                         parent = allAlbums.get(parentId);
281                                         if (parent == null) {
282                                                 logger.log(Level.WARNING, String.format("Downloaded Sone %s has album with invalid parent!", sone));
283                                                 return null;
284                                         }
285                                 }
286                                 Album album = database.newAlbumBuilder()
287                                                 .withId(id)
288                                                 .by(sone)
289                                                 .build()
290                                                 .modify()
291                                                 .setTitle(title)
292                                                 .setDescription(description)
293                                                 .update();
294                                 if (parent != null) {
295                                         parent.addAlbum(album);
296                                 } else {
297                                         topLevelAlbums.add(album);
298                                 }
299                                 allAlbums.put(album.getId(), album);
300                                 SimpleXML imagesXml = albumXml.getNode("images");
301                                 if (imagesXml != null) {
302                                         for (SimpleXML imageXml : imagesXml.getNodes("image")) {
303                                                 String imageId = imageXml.getValue("id", null);
304                                                 String imageCreationTimeString = imageXml.getValue("creation-time", null);
305                                                 String imageKey = imageXml.getValue("key", null);
306                                                 String imageTitle = imageXml.getValue("title", null);
307                                                 String imageDescription = imageXml.getValue("description", "");
308                                                 String imageWidthString = imageXml.getValue("width", null);
309                                                 String imageHeightString = imageXml.getValue("height", null);
310                                                 if ((imageId == null) || (imageCreationTimeString == null) || (imageKey == null) || (imageTitle == null) || (imageWidthString == null) || (imageHeightString == null)) {
311                                                         logger.log(Level.WARNING, String.format("Downloaded Sone %s contains invalid images!", sone));
312                                                         return null;
313                                                 }
314                                                 long creationTime = parseLong(imageCreationTimeString, 0L);
315                                                 int imageWidth = parseInt(imageWidthString, 0);
316                                                 int imageHeight = parseInt(imageHeightString, 0);
317                                                 if ((imageWidth < 1) || (imageHeight < 1)) {
318                                                         logger.log(Level.WARNING, String.format("Downloaded Sone %s contains image %s with invalid dimensions (%s, %s)!", sone, imageId, imageWidthString, imageHeightString));
319                                                         return null;
320                                                 }
321                                                 Image image = database.newImageBuilder().withId(imageId).build().modify().setSone(sone).setKey(imageKey).setCreationTime(creationTime).update();
322                                                 image = image.modify().setTitle(imageTitle).setDescription(imageDescription).update();
323                                                 image = image.modify().setWidth(imageWidth).setHeight(imageHeight).update();
324                                                 album.addImage(image);
325                                                 allImages.put(imageId, image);
326                                         }
327                                 }
328                         }
329                 }
330
331                 /* process avatar. */
332                 if (avatarId != null) {
333                         profile.setAvatar(allImages.get(avatarId));
334                 }
335
336                 /* okay, apparently everything was parsed correctly. Now import. */
337                 sone.setProfile(profile);
338                 sone.setPosts(posts);
339                 sone.setReplies(replies);
340                 sone.setLikePostIds(likedPostIds);
341                 sone.setLikeReplyIds(likedReplyIds);
342                 for (Album album : topLevelAlbums) {
343                         sone.getRootAlbum().addAlbum(album);
344                 }
345
346                 // record the duration
347                 stopwatch.stop();
348                 soneParsingDurationHistogram.update(stopwatch.elapsed(MICROSECONDS));
349
350                 return sone;
351
352         }
353
354 }