1 package net.pterodactylus.sone.core;
3 import static java.util.concurrent.TimeUnit.*;
4 import static java.util.logging.Logger.*;
5 import static net.pterodactylus.sone.utils.NumberParsers.*;
9 import java.util.logging.*;
11 import javax.annotation.*;
12 import javax.inject.*;
14 import net.pterodactylus.sone.data.*;
15 import net.pterodactylus.sone.data.Profile.*;
16 import net.pterodactylus.sone.database.*;
17 import net.pterodactylus.util.xml.*;
19 import com.codahale.metrics.*;
20 import com.google.common.base.*;
24 * Parses a {@link Sone} from an XML {@link InputStream}.
26 public class SoneParser {
28 private static final Logger logger = getLogger(SoneParser.class.getName());
29 private static final int MAX_PROTOCOL_VERSION = 0;
30 private final Database database;
31 private final Histogram soneParsingDurationHistogram;
34 public SoneParser(Database database, MetricRegistry metricRegistry) {
35 this.database = database;
36 this.soneParsingDurationHistogram = metricRegistry.histogram("sone.parse.duration", () -> new Histogram(new ExponentiallyDecayingReservoir(3000, 0)));
40 public Sone parseSone(Sone originalSone, InputStream soneInputStream) throws SoneException {
41 /* TODO - impose a size limit? */
43 Stopwatch stopwatch = Stopwatch.createStarted();
45 /* XML parsing is not thread-safe. */
47 document = XML.transformToDocument(soneInputStream);
49 if (document == null) {
50 /* TODO - mark Sone as bad. */
51 logger.log(Level.WARNING, String.format("Could not parse XML for Sone %s!", originalSone));
55 SoneBuilder soneBuilder = database.newSoneBuilder().from(originalSone.getIdentity());
56 if (originalSone.isLocal()) {
57 soneBuilder = soneBuilder.local();
59 Sone sone = soneBuilder.build();
63 soneXml = SimpleXML.fromDocument(document);
64 } catch (NullPointerException npe1) {
65 /* for some reason, invalid XML can cause NPEs. */
66 logger.log(Level.WARNING, String.format("XML for Sone %s can not be parsed!", sone), npe1);
70 Integer protocolVersion = null;
71 String soneProtocolVersion = soneXml.getValue("protocol-version", null);
72 if (soneProtocolVersion != null) {
73 protocolVersion = parseInt(soneProtocolVersion, null);
75 if (protocolVersion == null) {
76 logger.log(Level.INFO, "No protocol version found, assuming 0.");
80 if (protocolVersion < 0) {
81 logger.log(Level.WARNING, String.format("Invalid protocol version: %d! Not parsing Sone.", protocolVersion));
85 /* check for valid versions. */
86 if (protocolVersion > MAX_PROTOCOL_VERSION) {
87 logger.log(Level.WARNING, String.format("Unknown protocol version: %d! Not parsing Sone.", protocolVersion));
91 String soneTime = soneXml.getValue("time", null);
92 if (soneTime == null) {
93 /* TODO - mark Sone as bad. */
94 logger.log(Level.WARNING, String.format("Downloaded time for Sone %s was null!", sone));
98 sone.setTime(Long.parseLong(soneTime));
99 } catch (NumberFormatException nfe1) {
100 /* TODO - mark Sone as bad. */
101 logger.log(Level.WARNING, String.format("Downloaded Sone %s with invalid time: %s", sone, soneTime));
105 SimpleXML clientXml = soneXml.getNode("client");
106 if (clientXml != null) {
107 String clientName = clientXml.getValue("name", null);
108 String clientVersion = clientXml.getValue("version", null);
109 if ((clientName == null) || (clientVersion == null)) {
110 logger.log(Level.WARNING, String.format("Download Sone %s with client XML but missing name or version!", sone));
113 sone.setClient(new Client(clientName, clientVersion));
116 SimpleXML profileXml = soneXml.getNode("profile");
117 if (profileXml == null) {
118 /* TODO - mark Sone as bad. */
119 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no profile!", sone));
124 String profileFirstName = profileXml.getValue("first-name", null);
125 String profileMiddleName = profileXml.getValue("middle-name", null);
126 String profileLastName = profileXml.getValue("last-name", null);
127 Integer profileBirthDay = parseInt(profileXml.getValue("birth-day", ""), null);
128 Integer profileBirthMonth = parseInt(profileXml.getValue("birth-month", ""), null);
129 Integer profileBirthYear = parseInt(profileXml.getValue("birth-year", ""), null);
130 Profile profile = new Profile(sone).setFirstName(profileFirstName).setMiddleName(profileMiddleName).setLastName(profileLastName);
131 profile.setBirthDay(profileBirthDay).setBirthMonth(profileBirthMonth).setBirthYear(profileBirthYear);
132 /* avatar is processed after images are loaded. */
133 String avatarId = profileXml.getValue("avatar", null);
135 /* parse profile fields. */
136 SimpleXML profileFieldsXml = profileXml.getNode("fields");
137 if (profileFieldsXml != null) {
138 for (SimpleXML fieldXml : profileFieldsXml.getNodes("field")) {
139 String fieldName = fieldXml.getValue("field-name", null);
140 String fieldValue = fieldXml.getValue("field-value", "");
141 if (fieldName == null) {
142 logger.log(Level.WARNING, String.format("Downloaded profile field for Sone %s with missing data! Name: %s, Value: %s", sone, fieldName, fieldValue));
146 profile.addField(fieldName.trim()).setValue(fieldValue);
147 } catch (EmptyFieldName efn1) {
148 logger.log(Level.WARNING, "Empty field name!", efn1);
150 } catch (DuplicateField df1) {
151 logger.log(Level.WARNING, String.format("Duplicate field: %s", fieldName), df1);
158 SimpleXML postsXml = soneXml.getNode("posts");
159 Set<Post> posts = new HashSet<>();
160 if (postsXml == null) {
161 /* TODO - mark Sone as bad. */
162 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no posts!", sone));
164 for (SimpleXML postXml : postsXml.getNodes("post")) {
165 String postId = postXml.getValue("id", null);
166 String postRecipientId = postXml.getValue("recipient", null);
167 String postTime = postXml.getValue("time", null);
168 String postText = postXml.getValue("text", null);
169 if ((postId == null) || (postTime == null) || (postText == null)) {
170 /* TODO - mark Sone as bad. */
171 logger.log(Level.WARNING, String.format("Downloaded post for Sone %s with missing data! ID: %s, Time: %s, Text: %s", sone, postId, postTime, postText));
175 PostBuilder postBuilder = database.newPostBuilder();
176 /* TODO - parse time correctly. */
177 postBuilder.withId(postId).from(sone.getId()).withTime(Long.parseLong(postTime)).withText(postText);
178 if ((postRecipientId != null) && (postRecipientId.length() == 43)) {
179 postBuilder.to(postRecipientId);
181 posts.add(postBuilder.build());
182 } catch (NumberFormatException nfe1) {
183 /* TODO - mark Sone as bad. */
184 logger.log(Level.WARNING, String.format("Downloaded post for Sone %s with invalid time: %s", sone, postTime));
191 SimpleXML repliesXml = soneXml.getNode("replies");
192 Set<PostReply> replies = new HashSet<>();
193 if (repliesXml == null) {
194 /* TODO - mark Sone as bad. */
195 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no replies!", sone));
197 for (SimpleXML replyXml : repliesXml.getNodes("reply")) {
198 String replyId = replyXml.getValue("id", null);
199 String replyPostId = replyXml.getValue("post-id", null);
200 String replyTime = replyXml.getValue("time", null);
201 String replyText = replyXml.getValue("text", null);
202 if ((replyId == null) || (replyPostId == null) || (replyTime == null) || (replyText == null)) {
203 /* TODO - mark Sone as bad. */
204 logger.log(Level.WARNING, String.format("Downloaded reply for Sone %s with missing data! ID: %s, Post: %s, Time: %s, Text: %s", sone, replyId, replyPostId, replyTime, replyText));
208 PostReplyBuilder postReplyBuilder = database.newPostReplyBuilder();
209 /* TODO - parse time correctly. */
210 postReplyBuilder.withId(replyId).from(sone.getId()).to(replyPostId).withTime(Long.parseLong(replyTime)).withText(replyText);
211 replies.add(postReplyBuilder.build());
212 } catch (NumberFormatException nfe1) {
213 /* TODO - mark Sone as bad. */
214 logger.log(Level.WARNING, String.format("Downloaded reply for Sone %s with invalid time: %s", sone, replyTime));
220 /* parse liked post IDs. */
221 SimpleXML likePostIdsXml = soneXml.getNode("post-likes");
222 Set<String> likedPostIds = new HashSet<>();
223 if (likePostIdsXml == null) {
224 /* TODO - mark Sone as bad. */
225 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no post likes!", sone));
227 for (SimpleXML likedPostIdXml : likePostIdsXml.getNodes("post-like")) {
228 String postId = likedPostIdXml.getValue();
229 likedPostIds.add(postId);
233 /* parse liked reply IDs. */
234 SimpleXML likeReplyIdsXml = soneXml.getNode("reply-likes");
235 Set<String> likedReplyIds = new HashSet<>();
236 if (likeReplyIdsXml == null) {
237 /* TODO - mark Sone as bad. */
238 logger.log(Level.WARNING, String.format("Downloaded Sone %s has no reply likes!", sone));
240 for (SimpleXML likedReplyIdXml : likeReplyIdsXml.getNodes("reply-like")) {
241 String replyId = likedReplyIdXml.getValue();
242 likedReplyIds.add(replyId);
247 SimpleXML albumsXml = soneXml.getNode("albums");
248 Map<String, Image> allImages = new HashMap<>();
249 List<Album> topLevelAlbums = new ArrayList<>();
250 Map<String, Album> allAlbums = new HashMap<>();
251 if (albumsXml != null) {
252 for (SimpleXML albumXml : albumsXml.getNodes("album")) {
253 String id = albumXml.getValue("id", null);
254 String parentId = albumXml.getValue("parent", null);
255 String title = albumXml.getValue("title", null);
256 String description = albumXml.getValue("description", "");
257 if ((id == null) || (title == null)) {
258 logger.log(Level.WARNING, String.format("Downloaded Sone %s contains invalid album!", sone));
262 if (parentId != null) {
263 parent = allAlbums.get(parentId);
264 if (parent == null) {
265 logger.log(Level.WARNING, String.format("Downloaded Sone %s has album with invalid parent!", sone));
269 Album album = database.newAlbumBuilder()
275 .setDescription(description)
277 if (parent != null) {
278 parent.addAlbum(album);
280 topLevelAlbums.add(album);
282 allAlbums.put(album.getId(), album);
283 SimpleXML imagesXml = albumXml.getNode("images");
284 if (imagesXml != null) {
285 for (SimpleXML imageXml : imagesXml.getNodes("image")) {
286 String imageId = imageXml.getValue("id", null);
287 String imageCreationTimeString = imageXml.getValue("creation-time", null);
288 String imageKey = imageXml.getValue("key", null);
289 String imageTitle = imageXml.getValue("title", null);
290 String imageDescription = imageXml.getValue("description", "");
291 String imageWidthString = imageXml.getValue("width", null);
292 String imageHeightString = imageXml.getValue("height", null);
293 if ((imageId == null) || (imageCreationTimeString == null) || (imageKey == null) || (imageTitle == null) || (imageWidthString == null) || (imageHeightString == null)) {
294 logger.log(Level.WARNING, String.format("Downloaded Sone %s contains invalid images!", sone));
297 long creationTime = parseLong(imageCreationTimeString, 0L);
298 int imageWidth = parseInt(imageWidthString, 0);
299 int imageHeight = parseInt(imageHeightString, 0);
300 if ((imageWidth < 1) || (imageHeight < 1)) {
301 logger.log(Level.WARNING, String.format("Downloaded Sone %s contains image %s with invalid dimensions (%s, %s)!", sone, imageId, imageWidthString, imageHeightString));
304 Image image = database.newImageBuilder().withId(imageId).build().modify().setSone(sone).setKey(imageKey).setCreationTime(creationTime).update();
305 image = image.modify().setTitle(imageTitle).setDescription(imageDescription).update();
306 image = image.modify().setWidth(imageWidth).setHeight(imageHeight).update();
307 album.addImage(image);
308 allImages.put(imageId, image);
314 /* process avatar. */
315 if (avatarId != null) {
316 profile.setAvatar(allImages.get(avatarId));
319 /* okay, apparently everything was parsed correctly. Now import. */
320 sone.setProfile(profile);
321 sone.setPosts(posts);
322 sone.setReplies(replies);
323 sone.setLikePostIds(likedPostIds);
324 sone.setLikeReplyIds(likedReplyIds);
325 for (Album album : topLevelAlbums) {
326 sone.getRootAlbum().addAlbum(album);
329 // record the duration
331 soneParsingDurationHistogram.update(stopwatch.elapsed(MICROSECONDS));
332 logger.fine(() -> "Parsed " + originalSone.getIdentity().getId() + "@" + originalSone.getLatestEdition() + " in " + stopwatch.elapsed(MICROSECONDS) + "μs.");