2 * © 2009 David ‘Bombe’ Roden
4 package net.pterodactylus.arachne.parser;
6 import java.io.BufferedReader;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
11 import javax.swing.text.MutableAttributeSet;
12 import javax.swing.text.html.HTML;
13 import javax.swing.text.html.HTMLEditorKit;
14 import javax.swing.text.html.HTML.Tag;
15 import javax.swing.text.html.HTMLEditorKit.ParserCallback;
16 import javax.swing.text.html.parser.ParserDelegator;
18 import de.ina.util.io.Closer;
21 * {@link Parser} implementation based on Swing’s {@link HTMLEditorKit}. This
22 * parser can be re-used but is not thread-safe!
24 * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
26 public class HtmlEditorKitParser extends HTMLEditorKit.ParserCallback implements Parser {
28 /** The parser listener. */
29 private ParserListener parserListener;
31 /** The current input stream. */
32 private InputStream inputStream;
34 /** Whether we’re currently parsing the title. */
35 private boolean inTitle;
37 /** The title text. */
38 private String titleText;
40 /** Whether we’re currently parsing a link. */
41 private boolean inLink;
43 /** The target of the link. */
44 private String linkTarget;
46 /** The title attribute of the link. */
47 private String linkTitle;
49 /** The text of the link. */
50 private StringBuilder linkText;
55 * @see net.pterodactylus.arachne.parser.Parser#parse(ParserListener,
56 * InputStream, String)
58 public void parse(ParserListener parserListener, InputStream inputStream, String charset) throws IOException {
59 this.parserListener = parserListener;
60 this.inputStream = inputStream;
61 InputStreamReader inputStreamReader = null;
62 BufferedReader htmlReader = null;
64 inputStreamReader = new InputStreamReader(inputStream, charset);
65 htmlReader = new BufferedReader(inputStreamReader);
66 new ParserDelegator().parse(htmlReader, this, true);
68 Closer.close(htmlReader);
69 Closer.close(inputStreamReader);
74 // METHODS FROM ParserCallback
81 public void handleStartTag(Tag tag, MutableAttributeSet attributeSet, int position) {
82 if (tag == Tag.TITLE) {
83 if (!attributeSet.containsAttribute(ParserCallback.IMPLIED, Boolean.TRUE)) {
86 } else if (tag == Tag.A) {
87 String href = (String) attributeSet.getAttribute(HTML.Attribute.HREF);
91 linkText = new StringBuilder();
92 linkTitle = (String) attributeSet.getAttribute(HTML.Attribute.TITLE);
96 if (tag.breaksFlow()) {
106 public void handleText(char[] data, int pos) {
108 titleText = new String(data);
110 linkText.append(data);
118 public void handleEndTag(Tag tag, int position) {
119 if (tag == Tag.TITLE) {
121 parserListener.parsedTitle(inputStream, titleText);
122 } else if (tag == Tag.A) {
124 parserListener.parsedLink(inputStream, linkTarget, linkTitle, linkText.toString());