Use double instead of int for score calculation, weigh position of hit more.
[Sone.git] / src / main / java / net / pterodactylus / sone / web / SearchPage.java
1 /*
2  * Sone - OptionsPage.java - Copyright © 2010 David Roden
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17
18 package net.pterodactylus.sone.web;
19
20 import java.util.ArrayList;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.Comparator;
24 import java.util.HashSet;
25 import java.util.List;
26 import java.util.Set;
27
28 import net.pterodactylus.sone.data.Post;
29 import net.pterodactylus.sone.data.Profile;
30 import net.pterodactylus.sone.data.Profile.Field;
31 import net.pterodactylus.sone.data.Reply;
32 import net.pterodactylus.sone.data.Sone;
33 import net.pterodactylus.util.collection.Converter;
34 import net.pterodactylus.util.collection.Converters;
35 import net.pterodactylus.util.collection.Pagination;
36 import net.pterodactylus.util.filter.Filter;
37 import net.pterodactylus.util.filter.Filters;
38 import net.pterodactylus.util.logging.Logging;
39 import net.pterodactylus.util.number.Numbers;
40 import net.pterodactylus.util.template.Template;
41 import net.pterodactylus.util.template.TemplateContext;
42 import net.pterodactylus.util.text.StringEscaper;
43 import net.pterodactylus.util.text.TextException;
44
45 /**
46  * This page lets the user search for posts and replies that contain certain
47  * words.
48  *
49  * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
50  */
51 public class SearchPage extends SoneTemplatePage {
52
53         /**
54          * Creates a new search page.
55          *
56          * @param template
57          *            The template to render
58          * @param webInterface
59          *            The Sone web interface
60          */
61         public SearchPage(Template template, WebInterface webInterface) {
62                 super("search.html", template, "Page.Search.Title", webInterface);
63         }
64
65         //
66         // SONETEMPLATEPAGE METHODS
67         //
68
69         /**
70          * {@inheritDoc}
71          */
72         @Override
73         protected void processTemplate(Request request, TemplateContext templateContext) throws RedirectException {
74                 super.processTemplate(request, templateContext);
75                 String query = request.getHttpRequest().getParam("query").trim();
76                 if (query.length() == 0) {
77                         throw new RedirectException("index.html");
78                 }
79
80                 List<Phrase> phrases = parseSearchPhrases(query);
81
82                 Set<Sone> sones = webInterface.getCore().getSones();
83                 Set<Hit<Sone>> soneHits = getHits(sones, phrases, SoneStringGenerator.COMPLETE_GENERATOR);
84
85                 Set<Post> posts = new HashSet<Post>();
86                 for (Sone sone : sones) {
87                         posts.addAll(sone.getPosts());
88                 }
89                 @SuppressWarnings("synthetic-access")
90                 Set<Hit<Post>> postHits = getHits(Filters.filteredSet(posts, Post.FUTURE_POSTS_FILTER), phrases, new PostStringGenerator());
91
92                 /* now filter. */
93                 soneHits = Filters.filteredSet(soneHits, Hit.POSITIVE_FILTER);
94                 postHits = Filters.filteredSet(postHits, Hit.POSITIVE_FILTER);
95
96                 /* now sort. */
97                 List<Hit<Sone>> sortedSoneHits = new ArrayList<Hit<Sone>>(soneHits);
98                 Collections.sort(sortedSoneHits, Hit.DESCENDING_COMPARATOR);
99                 List<Hit<Post>> sortedPostHits = new ArrayList<Hit<Post>>(postHits);
100                 Collections.sort(sortedPostHits, Hit.DESCENDING_COMPARATOR);
101
102                 /* extract Sones and posts. */
103                 List<Sone> resultSones = Converters.convertList(sortedSoneHits, new HitConverter<Sone>());
104                 List<Post> resultPosts = Converters.convertList(sortedPostHits, new HitConverter<Post>());
105
106                 /* pagination. */
107                 Pagination<Sone> sonePagination = new Pagination<Sone>(resultSones, webInterface.getCore().getPreferences().getPostsPerPage()).setPage(Numbers.safeParseInteger(request.getHttpRequest().getParam("sonePage"), 0));
108                 Pagination<Post> postPagination = new Pagination<Post>(resultPosts, webInterface.getCore().getPreferences().getPostsPerPage()).setPage(Numbers.safeParseInteger(request.getHttpRequest().getParam("postPage"), 0));
109
110                 templateContext.set("sonePagination", sonePagination);
111                 templateContext.set("soneHits", sonePagination.getItems());
112                 templateContext.set("postPagination", postPagination);
113                 templateContext.set("postHits", postPagination.getItems());
114         }
115
116         //
117         // PRIVATE METHODS
118         //
119
120         /**
121          * Collects hit information for the given objects. The objects are converted
122          * to a {@link String} using the given {@link StringGenerator}, and the
123          * {@link #calculateScore(List, String) calculated score} is stored together
124          * with the object in a {@link Hit}, and all resulting {@link Hit}s are then
125          * returned.
126          *
127          * @param <T>
128          *            The type of the objects
129          * @param objects
130          *            The objects to search over
131          * @param phrases
132          *            The phrases to search for
133          * @param stringGenerator
134          *            The string generator for the objects
135          * @return The hits for the given phrases
136          */
137         private <T> Set<Hit<T>> getHits(Collection<T> objects, List<Phrase> phrases, StringGenerator<T> stringGenerator) {
138                 Set<Hit<T>> hits = new HashSet<Hit<T>>();
139                 for (T object : objects) {
140                         String objectString = stringGenerator.generateString(object);
141                         double score = calculateScore(phrases, objectString);
142                         hits.add(new Hit<T>(object, score));
143                 }
144                 return hits;
145         }
146
147         /**
148          * Parses the given query into search phrases. The query is split on
149          * whitespace while allowing to group words using single or double quotes.
150          * Isolated phrases starting with a “+” are
151          * {@link Phrase.Optionality#REQUIRED}, phrases with a “-” are
152          * {@link Phrase.Optionality#FORBIDDEN}.
153          *
154          * @param query
155          *            The query to parse
156          * @return The parsed phrases
157          */
158         private List<Phrase> parseSearchPhrases(String query) {
159                 List<String> parsedPhrases = null;
160                 try {
161                         parsedPhrases = StringEscaper.parseLine(query);
162                 } catch (TextException te1) {
163                         /* invalid query. */
164                         return Collections.emptyList();
165                 }
166
167                 List<Phrase> phrases = new ArrayList<Phrase>();
168                 for (String phrase : parsedPhrases) {
169                         if (phrase.startsWith("+")) {
170                                 phrases.add(new Phrase(phrase.substring(1), Phrase.Optionality.REQUIRED));
171                         } else if (phrase.startsWith("-")) {
172                                 phrases.add(new Phrase(phrase.substring(1), Phrase.Optionality.FORBIDDEN));
173                         }
174                         phrases.add(new Phrase(phrase, Phrase.Optionality.OPTIONAL));
175                 }
176                 return phrases;
177         }
178
179         /**
180          * Calculates the score for the given expression when using the given
181          * phrases.
182          *
183          * @param phrases
184          *            The phrases to search for
185          * @param expression
186          *            The expression to search
187          * @return The score of the expression
188          */
189         private double calculateScore(List<Phrase> phrases, String expression) {
190                 double optionalHits = 0;
191                 double requiredHits = 0;
192                 int forbiddenHits = 0;
193                 int requiredPhrases = 0;
194                 for (Phrase phrase : phrases) {
195                         String phraseString = phrase.getPhrase().toLowerCase();
196                         if (phrase.getOptionality() == Phrase.Optionality.REQUIRED) {
197                                 ++requiredPhrases;
198                         }
199                         int matches = 0;
200                         int index = 0;
201                         double score = 0;
202                         while (index < expression.length()) {
203                                 int position = expression.toLowerCase().indexOf(phraseString, index);
204                                 if (position == -1) {
205                                         break;
206                                 }
207                                 score += Math.pow(1 - position / (double) expression.length(), 2);
208                                 index = position + phraseString.length();
209                                 ++matches;
210                         }
211                         if (matches == 0) {
212                                 continue;
213                         }
214                         if (phrase.getOptionality() == Phrase.Optionality.REQUIRED) {
215                                 requiredHits += score;
216                         }
217                         if (phrase.getOptionality() == Phrase.Optionality.OPTIONAL) {
218                                 optionalHits += score;
219                         }
220                         if (phrase.getOptionality() == Phrase.Optionality.FORBIDDEN) {
221                                 forbiddenHits += matches;
222                         }
223                 }
224                 return requiredHits * 3 + optionalHits + (requiredHits - requiredPhrases) * 5 - (forbiddenHits * 2);
225         }
226
227         /**
228          * Converts a given object into a {@link String}.
229          *
230          * @param <T>
231          *            The type of the objects
232          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
233          */
234         private static interface StringGenerator<T> {
235
236                 /**
237                  * Generates a {@link String} for the given object.
238                  *
239                  * @param object
240                  *            The object to generate the {@link String} for
241                  * @return The generated {@link String}
242                  */
243                 public String generateString(T object);
244
245         }
246
247         /**
248          * Generates a {@link String} from a {@link Sone}, concatenating the name of
249          * the Sone and all {@link Profile} {@link Field} values.
250          *
251          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
252          */
253         private static class SoneStringGenerator implements StringGenerator<Sone> {
254
255                 /** A static instance of a complete Sone string generator. */
256                 public static final SoneStringGenerator COMPLETE_GENERATOR = new SoneStringGenerator(true);
257
258                 /**
259                  * A static instance of a Sone string generator that will only use the
260                  * name of the Sone.
261                  */
262                 public static final SoneStringGenerator NAME_GENERATOR = new SoneStringGenerator(false);
263
264                 /** Whether to generate a string from all data of a Sone. */
265                 private final boolean complete;
266
267                 /**
268                  * Creates a new Sone string generator.
269                  *
270                  * @param complete
271                  *            {@code true} to use the profile’s fields, {@code false} to
272                  *            not to use the profile‘s fields
273                  */
274                 private SoneStringGenerator(boolean complete) {
275                         this.complete = complete;
276                 }
277
278                 /**
279                  * {@inheritDoc}
280                  */
281                 @Override
282                 public String generateString(Sone sone) {
283                         StringBuilder soneString = new StringBuilder();
284                         soneString.append(sone.getName());
285                         Profile soneProfile = sone.getProfile();
286                         if (soneProfile.getFirstName() != null) {
287                                 soneString.append(' ').append(soneProfile.getFirstName());
288                         }
289                         if (soneProfile.getMiddleName() != null) {
290                                 soneString.append(' ').append(soneProfile.getMiddleName());
291                         }
292                         if (soneProfile.getLastName() != null) {
293                                 soneString.append(' ').append(soneProfile.getLastName());
294                         }
295                         if (complete) {
296                                 for (Field field : soneProfile.getFields()) {
297                                         soneString.append(' ').append(field.getValue());
298                                 }
299                         }
300                         return soneString.toString();
301                 }
302
303         }
304
305         /**
306          * Generates a {@link String} from a {@link Post}, concatenating the text of
307          * the post, the text of all {@link Reply}s, and the name of all
308          * {@link Sone}s that have replied.
309          *
310          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
311          */
312         private class PostStringGenerator implements StringGenerator<Post> {
313
314                 /**
315                  * {@inheritDoc}
316                  */
317                 @Override
318                 public String generateString(Post post) {
319                         StringBuilder postString = new StringBuilder();
320                         postString.append(post.getText());
321                         if (post.getRecipient() != null) {
322                                 postString.append(' ').append(SoneStringGenerator.NAME_GENERATOR.generateString(post.getRecipient()));
323                         }
324                         for (Reply reply : Filters.filteredList(webInterface.getCore().getReplies(post), Reply.FUTURE_REPLIES_FILTER)) {
325                                 postString.append(' ').append(SoneStringGenerator.NAME_GENERATOR.generateString(reply.getSone()));
326                                 postString.append(' ').append(reply.getText());
327                         }
328                         return postString.toString();
329                 }
330
331         }
332
333         /**
334          * A search phrase.
335          *
336          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
337          */
338         private static class Phrase {
339
340                 /**
341                  * The optionality of a search phrase.
342                  *
343                  * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’
344                  *         Roden</a>
345                  */
346                 public enum Optionality {
347
348                         /** The phrase is optional. */
349                         OPTIONAL,
350
351                         /** The phrase is required. */
352                         REQUIRED,
353
354                         /** The phrase is forbidden. */
355                         FORBIDDEN
356
357                 }
358
359                 /** The phrase to search for. */
360                 private final String phrase;
361
362                 /** The optionality of the phrase. */
363                 private final Optionality optionality;
364
365                 /**
366                  * Creates a new phrase.
367                  *
368                  * @param phrase
369                  *            The phrase to search for
370                  * @param optionality
371                  *            The optionality of the phrase
372                  */
373                 public Phrase(String phrase, Optionality optionality) {
374                         this.optionality = optionality;
375                         this.phrase = phrase;
376                 }
377
378                 /**
379                  * Returns the phrase to search for.
380                  *
381                  * @return The phrase to search for
382                  */
383                 public String getPhrase() {
384                         return phrase;
385                 }
386
387                 /**
388                  * Returns the optionality of the phrase.
389                  *
390                  * @return The optionality of the phrase
391                  */
392                 public Optionality getOptionality() {
393                         return optionality;
394                 }
395
396         }
397
398         /**
399          * A hit consists of a searched object and the score it got for the phrases
400          * of the search.
401          *
402          * @see SearchPage#calculateScore(List, String)
403          * @param <T>
404          *            The type of the searched object
405          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
406          */
407         private static class Hit<T> {
408
409                 /** Filter for {@link Hit}s with a score of more than 0. */
410                 public static final Filter<Hit<?>> POSITIVE_FILTER = new Filter<Hit<?>>() {
411
412                         @Override
413                         public boolean filterObject(Hit<?> hit) {
414                                 return hit.getScore() > 0;
415                         }
416
417                 };
418
419                 /** Comparator that sorts {@link Hit}s descending by score. */
420                 public static final Comparator<Hit<?>> DESCENDING_COMPARATOR = new Comparator<Hit<?>>() {
421
422                         @Override
423                         public int compare(Hit<?> leftHit, Hit<?> rightHit) {
424                                 return (rightHit.getScore() < leftHit.getScore()) ? -1 : ((rightHit.getScore() > leftHit.getScore()) ? 1 : 0);
425                         }
426
427                 };
428
429                 /** The object that was searched. */
430                 private final T object;
431
432                 /** The score of the object. */
433                 private final double score;
434
435                 /**
436                  * Creates a new hit.
437                  *
438                  * @param object
439                  *            The object that was searched
440                  * @param score
441                  *            The score of the object
442                  */
443                 public Hit(T object, double score) {
444                         this.object = object;
445                         this.score = score;
446                 }
447
448                 /**
449                  * Returns the object that was searched.
450                  *
451                  * @return The object that was searched
452                  */
453                 public T getObject() {
454                         return object;
455                 }
456
457                 /**
458                  * Returns the score of the object.
459                  *
460                  * @return The score of the object
461                  */
462                 public double getScore() {
463                         return score;
464                 }
465
466         }
467
468         /**
469          * Extracts the object from a {@link Hit}.
470          *
471          * @param <T>
472          *            The type of the object to extract
473          * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
474          */
475         public static class HitConverter<T> implements Converter<Hit<T>, T> {
476
477                 /**
478                  * {@inheritDoc}
479                  */
480                 @Override
481                 public T convert(Hit<T> input) {
482                         return input.getObject();
483                 }
484
485         }
486
487 }