*/
package net.pterodactylus.arachne.parser;
+import java.io.BufferedReader;
+import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
+import javax.swing.text.MutableAttributeSet;
+import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
+import javax.swing.text.html.HTML.Tag;
+import javax.swing.text.html.HTMLEditorKit.ParserCallback;
+import javax.swing.text.html.parser.ParserDelegator;
+
+import de.ina.util.io.Closer;
/**
- * {@link Parser} implementation based on Swing’s {@link HTMLEditorKit}.
+ * {@link Parser} implementation based on Swing’s {@link HTMLEditorKit}. This
+ * parser can be re-used but is not thread-safe!
*
* @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
*/
-public class HtmlEditorKitParser implements Parser {
+public class HtmlEditorKitParser extends HTMLEditorKit.ParserCallback implements Parser {
+
+ /** The parser listener. */
+ private ParserListener parserListener;
+
+ /** The current input stream. */
+ private InputStream inputStream;
+
+ /** Whether we’re currently parsing the title. */
+ private boolean inTitle;
+
+ /** The title text. */
+ private String titleText;
+
+ /** Whether we’re currently parsing a link. */
+ private boolean inLink;
+
+ /** The target of the link. */
+ private String linkTarget;
+
+ /** The title attribute of the link. */
+ private String linkTitle;
+
+ /** The text of the link. */
+ private StringBuilder linkText;
/**
* {@inheritdoc}
* InputStream, String)
*/
public void parse(ParserListener parserListener, InputStream inputStream, String charset) throws IOException {
+ this.parserListener = parserListener;
+ this.inputStream = inputStream;
+ InputStreamReader inputStreamReader = null;
+ BufferedReader htmlReader = null;
+ try {
+ inputStreamReader = new InputStreamReader(inputStream, charset);
+ htmlReader = new BufferedReader(inputStreamReader);
+ new ParserDelegator().parse(htmlReader, this, true);
+ } finally {
+ Closer.close(htmlReader);
+ Closer.close(inputStreamReader);
+ }
+ }
+
+ //
+ // METHODS FROM ParserCallback
+ //
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void handleStartTag(Tag tag, MutableAttributeSet attributeSet, int position) {
+ if (tag == Tag.TITLE) {
+ if (!attributeSet.containsAttribute(ParserCallback.IMPLIED, Boolean.TRUE)) {
+ inTitle = true;
+ }
+ } else if (tag == Tag.A) {
+ String href = (String) attributeSet.getAttribute(HTML.Attribute.HREF);
+ if (href != null) {
+ linkTarget = href;
+ inLink = true;
+ linkText = new StringBuilder();
+ linkTitle = null;
+ String title = (String) attributeSet.getAttribute(HTML.Attribute.TITLE);
+ if (title != null) {
+ linkTitle = title;
+ }
+ }
+ }
+ if (inLink) {
+ if (tag.breaksFlow()) {
+ linkText.append(' ');
+ }
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void handleText(char[] data, int pos) {
+ if (inTitle) {
+ titleText = new String(data);
+ } else if (inLink) {
+ linkText.append(data);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void handleEndTag(Tag tag, int position) {
+ if (tag == Tag.TITLE) {
+ inTitle = false;
+ parserListener.parsedTitle(inputStream, titleText);
+ } else if (tag == Tag.A) {
+ inLink = false;
+ parserListener.parsedLink(inputStream, linkTarget, linkTitle, linkText.toString());
+ }
}
}