import java.util.logging.Level;
import java.util.logging.Logger;
+import net.pterodactylus.arachne.parser.ParserFactory;
import de.ina.util.service.AbstractService;
import de.ina.util.thread.DumpingThreadFactory;
import de.ina.util.validation.Validation;
// INTERNAL MEMBERS
//
+ /** The parser factory. */
+ private ParserFactory parserFactory = new ParserFactory();
+
/** Thread pool for the URL fetches. */
private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
URLFetcher urlFetcher;
try {
logger.log(Level.INFO, "Fetching “" + nextURL + "”...");
- urlFetcher = new URLFetcher(nextURL);
+ urlFetcher = new URLFetcher(parserFactory, nextURL);
urlFetcherExecutor.execute(urlFetcher);
} catch (NoSuchAlgorithmException nsae1) {
logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1);
import java.util.logging.Level;
import java.util.logging.Logger;
-import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
+import net.pterodactylus.arachne.parser.Parser;
+import net.pterodactylus.arachne.parser.ParserFactory;
import net.pterodactylus.arachne.parser.ParserListener;
import de.ina.util.io.MessageDigestInputStream;
import de.ina.util.validation.Validation;
/** The logger. */
private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
+ /** The parser factory. */
+ private final ParserFactory parserFactory;
+
/** The URL to fetch. */
private final URL url;
/**
* Creates a new fetcher for the given URL.
*
+ * @param parserFactory
+ * The parser factory that is used to create content-type
+ * specific parsers
* @param url
* The URL to fetch
* @throws NoSuchAlgorithmException
* if no {@link MessageDigest} instance with an
* <code>SHA-256</code> algorithm can be created
*/
- public URLFetcher(URL url) throws NoSuchAlgorithmException {
- Validation.begin().isNotNull("url", url).check();
+ public URLFetcher(ParserFactory parserFactory, URL url) throws NoSuchAlgorithmException {
+ Validation.begin().isNotNull("parserFactory", parserFactory).isNotNull("url", url).check();
+ this.parserFactory = parserFactory;
this.url = url;
messageDigest = MessageDigest.getInstance("SHA-256");
}
logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
urlInputStream = urlConnection.getInputStream();
hashInputStream = new MessageDigestInputStream(urlInputStream, messageDigest);
- HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
- htmlEditorKitParser.parse(this, hashInputStream, "UTF-8");
+ Parser parser = parserFactory.getParser(contentType);
+ parser.parse(this, hashInputStream, "UTF-8");
hash = messageDigest.digest();
} catch (IOException ioe1) {
logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);