888e2df694718cf2455cb721b0fb0ac45435a6bd
[arachne.git] / src / net / pterodactylus / arachne / parser / HtmlEditorKitParser.java
1 /*
2  * © 2009 David ‘Bombe’ Roden
3  */
4 package net.pterodactylus.arachne.parser;
5
6 import java.io.BufferedReader;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10
11 import javax.swing.text.MutableAttributeSet;
12 import javax.swing.text.html.HTML;
13 import javax.swing.text.html.HTMLEditorKit;
14 import javax.swing.text.html.HTML.Tag;
15 import javax.swing.text.html.HTMLEditorKit.ParserCallback;
16 import javax.swing.text.html.parser.ParserDelegator;
17
18 import de.ina.util.io.Closer;
19
20 /**
21  * {@link Parser} implementation based on Swing’s {@link HTMLEditorKit}. This
22  * parser can be re-used but is not thread-safe!
23  *
24  * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
25  */
26 public class HtmlEditorKitParser extends HTMLEditorKit.ParserCallback implements Parser {
27
28         /** The parser listener. */
29         private ParserListener parserListener;
30
31         /** The current input stream. */
32         private InputStream inputStream;
33
34         /** Whether we’re currently parsing the title. */
35         private boolean inTitle;
36
37         /** The title text. */
38         private String titleText;
39
40         /** Whether we’re currently parsing a link. */
41         private boolean inLink;
42
43         /** The target of the link. */
44         private String linkTarget;
45
46         /** The title attribute of the link. */
47         private String linkTitle;
48
49         /** The text of the link. */
50         private StringBuilder linkText;
51
52         /**
53          * {@inheritdoc}
54          *
55          * @see net.pterodactylus.arachne.parser.Parser#parse(ParserListener,
56          *      InputStream, String)
57          */
58         public void parse(ParserListener parserListener, InputStream inputStream, String charset) throws IOException {
59                 this.parserListener = parserListener;
60                 this.inputStream = inputStream;
61                 InputStreamReader inputStreamReader = null;
62                 BufferedReader htmlReader = null;
63                 try {
64                         inputStreamReader = new InputStreamReader(inputStream, charset);
65                         htmlReader = new BufferedReader(inputStreamReader);
66                         new ParserDelegator().parse(htmlReader, this, true);
67                 } finally {
68                         Closer.close(htmlReader);
69                         Closer.close(inputStreamReader);
70                 }
71         }
72
73         //
74         // METHODS FROM ParserCallback
75         //
76
77         /**
78          * {@inheritDoc}
79          */
80         @Override
81         public void handleStartTag(Tag tag, MutableAttributeSet attributeSet, int position) {
82                 if (tag == Tag.TITLE) {
83                         if (!attributeSet.containsAttribute(ParserCallback.IMPLIED, Boolean.TRUE)) {
84                                 inTitle = true;
85                         }
86                 } else if (tag == Tag.A) {
87                         String href = (String) attributeSet.getAttribute(HTML.Attribute.HREF);
88                         if (href != null) {
89                                 linkTarget = href;
90                                 inLink = true;
91                                 linkText = new StringBuilder();
92                                 linkTitle = (String) attributeSet.getAttribute(HTML.Attribute.TITLE);
93                         }
94                 }
95                 if (inLink) {
96                         if (tag.breaksFlow()) {
97                                 linkText.append(' ');
98                         }
99                 }
100         }
101
102         /**
103          * {@inheritDoc}
104          */
105         @Override
106         public void handleText(char[] data, int pos) {
107                 if (inTitle) {
108                         titleText = new String(data);
109                 } else if (inLink) {
110                         linkText.append(data);
111                 }
112         }
113
114         /**
115          * {@inheritDoc}
116          */
117         @Override
118         public void handleEndTag(Tag tag, int position) {
119                 if (tag == Tag.TITLE) {
120                         inTitle = false;
121                         parserListener.parsedTitle(inputStream, titleText);
122                 } else if (tag == Tag.A) {
123                         inLink = false;
124                         parserListener.parsedLink(inputStream, linkTarget, linkTitle, linkText.toString());
125                 }
126         }
127
128 }