Don’t show line breaks in post excerpts.
[Sone.git] / src / main / java / net / pterodactylus / sone / web / SearchPage.java
1 /*
2  * Sone - SearchPage.java - Copyright © 2010 David Roden
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17
18 package net.pterodactylus.sone.web;
19
20 import java.util.ArrayList;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.Comparator;
24 import java.util.HashSet;
25 import java.util.List;
26 import java.util.Set;
27 import java.util.logging.Level;
28 import java.util.logging.Logger;
29
30 import net.pterodactylus.sone.data.Post;
31 import net.pterodactylus.sone.data.Profile;
32 import net.pterodactylus.sone.data.Profile.Field;
33 import net.pterodactylus.sone.data.Reply;
34 import net.pterodactylus.sone.data.Sone;
35 import net.pterodactylus.util.cache.Cache;
36 import net.pterodactylus.util.cache.CacheException;
37 import net.pterodactylus.util.cache.CacheItem;
38 import net.pterodactylus.util.cache.DefaultCacheItem;
39 import net.pterodactylus.util.cache.MemoryCache;
40 import net.pterodactylus.util.cache.ValueRetriever;
41 import net.pterodactylus.util.collection.Mapper;
42 import net.pterodactylus.util.collection.Mappers;
43 import net.pterodactylus.util.collection.Pagination;
44 import net.pterodactylus.util.collection.TimedMap;
45 import net.pterodactylus.util.filter.Filter;
46 import net.pterodactylus.util.filter.Filters;
47 import net.pterodactylus.util.logging.Logging;
48 import net.pterodactylus.util.number.Numbers;
49 import net.pterodactylus.util.template.Template;
50 import net.pterodactylus.util.template.TemplateContext;
51 import net.pterodactylus.util.text.StringEscaper;
52 import net.pterodactylus.util.text.TextException;
53
54 /**
55  * This page lets the user search for posts and replies that contain certain
56  * words.
57  *
58  * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
59  */
60 public class SearchPage extends SoneTemplatePage {
61
62         /** The logger. */
63         private static final Logger logger = Logging.getLogger(SearchPage.class);
64
65         /** Short-term cache. */
66         private final Cache<List<Phrase>, Set<Hit<Post>>> hitCache = new MemoryCache<List<Phrase>, Set<Hit<Post>>>(new ValueRetriever<List<Phrase>, Set<Hit<Post>>>() {
67
68                 @SuppressWarnings("synthetic-access")
69                 public CacheItem<Set<Hit<Post>>> retrieve(List<Phrase> phrases) throws CacheException {
70                         Set<Post> posts = new HashSet<Post>();
71                         for (Sone sone : webInterface.getCore().getSones()) {
72                                 posts.addAll(sone.getPosts());
73                         }
74                         return new DefaultCacheItem<Set<Hit<Post>>>(getHits(Filters.filteredSet(posts, Post.FUTURE_POSTS_FILTER), phrases, new PostStringGenerator()));
75                 }
76
77         }, new TimedMap<List<Phrase>, CacheItem<Set<Hit<Post>>>>(300000));
78
79         /**
80          * Creates a new search page.
81          *
82          * @param template
83          *            The template to render
84          * @param webInterface
85          *            The Sone web interface
86          */
87         public SearchPage(Template template, WebInterface webInterface) {
88                 super("search.html", template, "Page.Search.Title", webInterface);
89         }
90
91         //
92         // SONETEMPLATEPAGE METHODS
93         //
94
95         /**
96          * {@inheritDoc}
97          */
98         @Override
99         protected void processTemplate(Request request, TemplateContext templateContext) throws RedirectException {
100                 super.processTemplate(request, templateContext);
101                 String query = request.getHttpRequest().getParam("query").trim();
102                 if (query.length() == 0) {
103                         throw new RedirectException("index.html");
104                 }
105
106                 List<Phrase> phrases = parseSearchPhrases(query);
107                 if (phrases.isEmpty()) {
108                         throw new RedirectException("index.html");
109                 }
110
111                 Set<Sone> sones = webInterface.getCore().getSones();
112                 Set<Hit<Sone>> soneHits = getHits(sones, phrases, SoneStringGenerator.COMPLETE_GENERATOR);
113
114                 Set<Hit<Post>> postHits;
115                 try {
116                         postHits = hitCache.get(phrases);
117                 } catch (CacheException ce1) {
118                         /* should never happen. */
119                         logger.log(Level.SEVERE, "Could not get search results from cache!", ce1);
120                         postHits = Collections.emptySet();
121                 }
122
123                 /* now filter. */
124                 soneHits = Filters.filteredSet(soneHits, Hit.POSITIVE_FILTER);
125                 postHits = Filters.filteredSet(postHits, Hit.POSITIVE_FILTER);
126
127                 /* now sort. */
128                 List<Hit<Sone>> sortedSoneHits = new ArrayList<Hit<Sone>>(soneHits);
129                 Collections.sort(sortedSoneHits, Hit.DESCENDING_COMPARATOR);
130                 List<Hit<Post>> sortedPostHits = new ArrayList<Hit<Post>>(postHits);
131                 Collections.sort(sortedPostHits, Hit.DESCENDING_COMPARATOR);
132
133                 /* extract Sones and posts. */
134                 List<Sone> resultSones = Mappers.mappedList(sortedSoneHits, new HitMapper<Sone>());
135                 List<Post> resultPosts = Mappers.mappedList(sortedPostHits, new HitMapper<Post>());
136
137                 /* pagination. */
138                 Pagination<Sone> sonePagination = new Pagination<Sone>(resultSones, webInterface.getCore().getPreferences().getPostsPerPage()).setPage(Numbers.safeParseInteger(request.getHttpRequest().getParam("sonePage"), 0));
139                 Pagination<Post> postPagination = new Pagination<Post>(resultPosts, webInterface.getCore().getPreferences().getPostsPerPage()).setPage(Numbers.safeParseInteger(request.getHttpRequest().getParam("postPage"), 0));
140
141                 templateContext.set("sonePagination", sonePagination);
142                 templateContext.set("soneHits", sonePagination.getItems());
143                 templateContext.set("postPagination", postPagination);
144                 templateContext.set("postHits", postPagination.getItems());
145         }
146
147         //
148         // PRIVATE METHODS
149         //
150
151         /**
152          * Collects hit information for the given objects. The objects are converted
153          * to a {@link String} using the given {@link StringGenerator}, and the
154          * {@link #calculateScore(List, String) calculated score} is stored together
155          * with the object in a {@link Hit}, and all resulting {@link Hit}s are then
156          * returned.
157          *
158          * @param <T>
159          *            The type of the objects
160          * @param objects
161          *            The objects to search over
162          * @param phrases
163          *            The phrases to search for
164          * @param stringGenerator
165          *            The string generator for the objects
166          * @return The hits for the given phrases
167          */
168         private <T> Set<Hit<T>> getHits(Collection<T> objects, List<Phrase> phrases, StringGenerator<T> stringGenerator) {
169                 Set<Hit<T>> hits = new HashSet<Hit<T>>();
170                 for (T object : objects) {
171                         String objectString = stringGenerator.generateString(object);
172                         double score = calculateScore(phrases, objectString);
173                         hits.add(new Hit<T>(object, score));
174                 }
175                 return hits;
176         }
177
178         /**
179          * Parses the given query into search phrases. The query is split on
180          * whitespace while allowing to group words using single or double quotes.
181          * Isolated phrases starting with a “+” are
182          * {@link Phrase.Optionality#REQUIRED}, phrases with a “-” are
183          * {@link Phrase.Optionality#FORBIDDEN}.
184          *
185          * @param query
186          *            The query to parse
187          * @return The parsed phrases
188          */
189         private List<Phrase> parseSearchPhrases(String query) {
190                 List<String> parsedPhrases = null;
191                 try {
192                         parsedPhrases = StringEscaper.parseLine(query);
193                 } catch (TextException te1) {
194                         /* invalid query. */
195                         return Collections.emptyList();
196                 }
197
198                 List<Phrase> phrases = new ArrayList<Phrase>();
199                 for (String phrase : parsedPhrases) {
200                         if (phrase.startsWith("+")) {
201                                 if (phrase.length() > 1) {
202                                         phrases.add(new Phrase(phrase.substring(1), Phrase.Optionality.REQUIRED));
203                                 } else {
204                                         phrases.add(new Phrase("+", Phrase.Optionality.OPTIONAL));
205                                 }
206                         } else if (phrase.startsWith("-")) {
207                                 if (phrase.length() > 1) {
208                                         phrases.add(new Phrase(phrase.substring(1), Phrase.Optionality.FORBIDDEN));
209                                 } else {
210                                         phrases.add(new Phrase("-", Phrase.Optionality.OPTIONAL));
211                                 }
212                         } else {
213                                 phrases.add(new Phrase(phrase, Phrase.Optionality.OPTIONAL));
214                         }
215                 }
216                 return phrases;
217         }
218
219         /**
220          * Calculates the score for the given expression when using the given
221          * phrases.
222          *
223          * @param phrases
224          *            The phrases to search for
225          * @param expression
226          *            The expression to search
227          * @return The score of the expression
228          */
229         private double calculateScore(List<Phrase> phrases, String expression) {
230                 logger.log(Level.FINEST, "Calculating Score for “%s”…", expression);
231                 double optionalHits = 0;
232                 double requiredHits = 0;
233                 int forbiddenHits = 0;
234                 int requiredPhrases = 0;
235                 for (Phrase phrase : phrases) {
236                         String phraseString = phrase.getPhrase().toLowerCase();
237                         if (phrase.getOptionality() == Phrase.Optionality.REQUIRED) {
238                                 ++requiredPhrases;
239                         }
240                         int matches = 0;
241                         int index = 0;
242                         double score = 0;
243                         while (index < expression.length()) {
244                                 int position = expression.toLowerCase().indexOf(phraseString, index);
245                                 if (position == -1) {
246                                         break;
247                                 }
248                                 score += Math.pow(1 - position / (double) expression.length(), 2);
249                                 index = position + phraseString.length();
250                                 logger.log(Level.FINEST, "Got hit at position %d.", position);
251                                 ++matches;
252                         }
253                         logger.log(Level.FINEST, "Score: %f", score);
254                         if (matches == 0) {
255                                 continue;
256                         }
257                         if (phrase.getOptionality() == Phrase.Optionality.REQUIRED) {
258                                 requiredHits += score;
259                         }
260                         if (phrase.getOptionality() == Phrase.Optionality.OPTIONAL) {
261                                 optionalHits += score;
262                         }
263                         if (phrase.getOptionality() == Phrase.Optionality.FORBIDDEN) {
264                                 forbiddenHits += matches;
265                         }
266                 }
267                 return requiredHits * 3 + optionalHits + (requiredHits - requiredPhrases) * 5 - (forbiddenHits * 2);
268         }
269
270         /**
271          * Converts a given object into a {@link String}.
272          *
273          * @param <T>
274          *            The type of the objects
275          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
276          */
277         private static interface StringGenerator<T> {
278
279                 /**
280                  * Generates a {@link String} for the given object.
281                  *
282                  * @param object
283                  *            The object to generate the {@link String} for
284                  * @return The generated {@link String}
285                  */
286                 public String generateString(T object);
287
288         }
289
290         /**
291          * Generates a {@link String} from a {@link Sone}, concatenating the name of
292          * the Sone and all {@link Profile} {@link Field} values.
293          *
294          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
295          */
296         private static class SoneStringGenerator implements StringGenerator<Sone> {
297
298                 /** A static instance of a complete Sone string generator. */
299                 public static final SoneStringGenerator COMPLETE_GENERATOR = new SoneStringGenerator(true);
300
301                 /**
302                  * A static instance of a Sone string generator that will only use the
303                  * name of the Sone.
304                  */
305                 public static final SoneStringGenerator NAME_GENERATOR = new SoneStringGenerator(false);
306
307                 /** Whether to generate a string from all data of a Sone. */
308                 private final boolean complete;
309
310                 /**
311                  * Creates a new Sone string generator.
312                  *
313                  * @param complete
314                  *            {@code true} to use the profile’s fields, {@code false} to
315                  *            not to use the profile‘s fields
316                  */
317                 private SoneStringGenerator(boolean complete) {
318                         this.complete = complete;
319                 }
320
321                 /**
322                  * {@inheritDoc}
323                  */
324                 @Override
325                 public String generateString(Sone sone) {
326                         StringBuilder soneString = new StringBuilder();
327                         soneString.append(sone.getName());
328                         Profile soneProfile = sone.getProfile();
329                         if (soneProfile.getFirstName() != null) {
330                                 soneString.append(' ').append(soneProfile.getFirstName());
331                         }
332                         if (soneProfile.getMiddleName() != null) {
333                                 soneString.append(' ').append(soneProfile.getMiddleName());
334                         }
335                         if (soneProfile.getLastName() != null) {
336                                 soneString.append(' ').append(soneProfile.getLastName());
337                         }
338                         if (complete) {
339                                 for (Field field : soneProfile.getFields()) {
340                                         soneString.append(' ').append(field.getValue());
341                                 }
342                         }
343                         return soneString.toString();
344                 }
345
346         }
347
348         /**
349          * Generates a {@link String} from a {@link Post}, concatenating the text of
350          * the post, the text of all {@link Reply}s, and the name of all
351          * {@link Sone}s that have replied.
352          *
353          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
354          */
355         private class PostStringGenerator implements StringGenerator<Post> {
356
357                 /**
358                  * {@inheritDoc}
359                  */
360                 @Override
361                 public String generateString(Post post) {
362                         StringBuilder postString = new StringBuilder();
363                         postString.append(post.getText());
364                         if (post.getRecipient() != null) {
365                                 postString.append(' ').append(SoneStringGenerator.NAME_GENERATOR.generateString(post.getRecipient()));
366                         }
367                         for (Reply reply : Filters.filteredList(webInterface.getCore().getReplies(post), Reply.FUTURE_REPLIES_FILTER)) {
368                                 postString.append(' ').append(SoneStringGenerator.NAME_GENERATOR.generateString(reply.getSone()));
369                                 postString.append(' ').append(reply.getText());
370                         }
371                         return postString.toString();
372                 }
373
374         }
375
376         /**
377          * A search phrase.
378          *
379          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
380          */
381         private static class Phrase {
382
383                 /**
384                  * The optionality of a search phrase.
385                  *
386                  * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’
387                  *         Roden</a>
388                  */
389                 public enum Optionality {
390
391                         /** The phrase is optional. */
392                         OPTIONAL,
393
394                         /** The phrase is required. */
395                         REQUIRED,
396
397                         /** The phrase is forbidden. */
398                         FORBIDDEN
399
400                 }
401
402                 /** The phrase to search for. */
403                 private final String phrase;
404
405                 /** The optionality of the phrase. */
406                 private final Optionality optionality;
407
408                 /**
409                  * Creates a new phrase.
410                  *
411                  * @param phrase
412                  *            The phrase to search for
413                  * @param optionality
414                  *            The optionality of the phrase
415                  */
416                 public Phrase(String phrase, Optionality optionality) {
417                         this.optionality = optionality;
418                         this.phrase = phrase;
419                 }
420
421                 /**
422                  * Returns the phrase to search for.
423                  *
424                  * @return The phrase to search for
425                  */
426                 public String getPhrase() {
427                         return phrase;
428                 }
429
430                 /**
431                  * Returns the optionality of the phrase.
432                  *
433                  * @return The optionality of the phrase
434                  */
435                 public Optionality getOptionality() {
436                         return optionality;
437                 }
438
439                 //
440                 // OBJECT METHODS
441                 //
442
443                 /**
444                  * {@inheritDoc}
445                  */
446                 @Override
447                 public int hashCode() {
448                         return phrase.hashCode() ^ ((optionality == Optionality.FORBIDDEN) ? (0xaaaaaaaa) : ((optionality == Optionality.REQUIRED) ? 0x55555555 : 0));
449                 }
450
451                 /**
452                  * {@inheritDoc}
453                  */
454                 @Override
455                 public boolean equals(Object object) {
456                         if (!(object instanceof Phrase)) {
457                                 return false;
458                         }
459                         Phrase phrase = (Phrase) object;
460                         return (this.optionality == phrase.optionality) && this.phrase.equals(phrase.phrase);
461                 }
462
463         }
464
465         /**
466          * A hit consists of a searched object and the score it got for the phrases
467          * of the search.
468          *
469          * @see SearchPage#calculateScore(List, String)
470          * @param <T>
471          *            The type of the searched object
472          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
473          */
474         private static class Hit<T> {
475
476                 /** Filter for {@link Hit}s with a score of more than 0. */
477                 public static final Filter<Hit<?>> POSITIVE_FILTER = new Filter<Hit<?>>() {
478
479                         @Override
480                         public boolean filterObject(Hit<?> hit) {
481                                 return hit.getScore() > 0;
482                         }
483
484                 };
485
486                 /** Comparator that sorts {@link Hit}s descending by score. */
487                 public static final Comparator<Hit<?>> DESCENDING_COMPARATOR = new Comparator<Hit<?>>() {
488
489                         @Override
490                         public int compare(Hit<?> leftHit, Hit<?> rightHit) {
491                                 return (rightHit.getScore() < leftHit.getScore()) ? -1 : ((rightHit.getScore() > leftHit.getScore()) ? 1 : 0);
492                         }
493
494                 };
495
496                 /** The object that was searched. */
497                 private final T object;
498
499                 /** The score of the object. */
500                 private final double score;
501
502                 /**
503                  * Creates a new hit.
504                  *
505                  * @param object
506                  *            The object that was searched
507                  * @param score
508                  *            The score of the object
509                  */
510                 public Hit(T object, double score) {
511                         this.object = object;
512                         this.score = score;
513                 }
514
515                 /**
516                  * Returns the object that was searched.
517                  *
518                  * @return The object that was searched
519                  */
520                 public T getObject() {
521                         return object;
522                 }
523
524                 /**
525                  * Returns the score of the object.
526                  *
527                  * @return The score of the object
528                  */
529                 public double getScore() {
530                         return score;
531                 }
532
533         }
534
535         /**
536          * Extracts the object from a {@link Hit}.
537          *
538          * @param <T>
539          *            The type of the object to extract
540          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
541          */
542         public static class HitMapper<T> implements Mapper<Hit<T>, T> {
543
544                 /**
545                  * {@inheritDoc}
546                  */
547                 @Override
548                 public T map(Hit<T> input) {
549                         return input.getObject();
550                 }
551
552         }
553
554 }