import java.util.logging.Level;
import java.util.logging.Logger;
-import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
+import net.pterodactylus.arachne.parser.Parser;
+import net.pterodactylus.arachne.parser.ParserFactory;
import net.pterodactylus.arachne.parser.ParserListener;
import de.ina.util.io.MessageDigestInputStream;
import de.ina.util.validation.Validation;
/** The logger. */
private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
+ /** The parser factory. */
+ private final ParserFactory parserFactory;
+
/** The URL to fetch. */
private final URL url;
/**
* Creates a new fetcher for the given URL.
*
+ * @param parserFactory
+ * The parser factory that is used to create content-type
+ * specific parsers
* @param url
* The URL to fetch
* @throws NoSuchAlgorithmException
* if no {@link MessageDigest} instance with an
* <code>SHA-256</code> algorithm can be created
*/
- public URLFetcher(URL url) throws NoSuchAlgorithmException {
- Validation.begin().isNotNull("url", url).check();
+ public URLFetcher(ParserFactory parserFactory, URL url) throws NoSuchAlgorithmException {
+ Validation.begin().isNotNull("parserFactory", parserFactory).isNotNull("url", url).check();
+ this.parserFactory = parserFactory;
this.url = url;
messageDigest = MessageDigest.getInstance("SHA-256");
}
logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
urlInputStream = urlConnection.getInputStream();
hashInputStream = new MessageDigestInputStream(urlInputStream, messageDigest);
- HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
- htmlEditorKitParser.parse(this, hashInputStream, "UTF-8");
+ Parser parser = parserFactory.getParser(contentType);
+ parser.parse(this, hashInputStream, "UTF-8");
hash = messageDigest.digest();
} catch (IOException ioe1) {
logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);