import java.net.URLConnection;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
/** The logger. */
private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
- /** The core. */
- private final Core core;
-
/** The URL to fetch. */
private final URL url;
/** The hash of the fetched URL. */
private byte[] hash;
+ /** The collected URLs. */
+ private final List<Page> collectedPages = new ArrayList<Page>();
+
/**
* Creates a new fetcher for the given URL.
*
- * @param core
- * The core new pages are queued in
* @param url
* The URL to fetch
* @throws NoSuchAlgorithmException
* if no {@link MessageDigest} instance with an
* <code>SHA-256</code> algorithm can be created
*/
- public URLFetcher(Core core, URL url) throws NoSuchAlgorithmException {
- Validation.begin().isNotNull("core", core).isNotNull("url", url).check();
- this.core = core;
+ public URLFetcher(URL url) throws NoSuchAlgorithmException {
+ Validation.begin().isNotNull("url", url).check();
this.url = url;
messageDigest = MessageDigest.getInstance("SHA-256");
}
//
/**
+ * Returns the pages collected while parsing this URL.
+ *
+ * @return The collected pages
+ */
+ public List<Page> getCollectedPages() {
+ return collectedPages;
+ }
+
+ /**
* Returns the hash of the content of the fetched URL. The returned value is
* only valid after {@link #run()} has been called.
*
URL newLink = null;
try {
newLink = new URL(url, linkTarget);
- core.addPage(newLink);
+ Page newPage = Page.fromURL(newLink);
+ if (newPage != null) {
+ collectedPages.add(newPage);
+ }
} catch (MalformedURLException mue1) {
logger.log(Level.WARNING, "Could not create URL from “" + url + "” and “" + linkTarget + "”.", mue1);
- } catch (IllegalArgumentException iae1) {
- logger.log(Level.WARNING, "Could not add “" + newLink + "” to core queue.", iae1);
}
}