Stub implementation of page fetching.
[arachne.git] / src / net / pterodactylus / arachne / core / Core.java
1 /*
2  * © 2009 David ‘Bombe’ Roden
3  */
4 package net.pterodactylus.arachne.core;
5
6 import java.net.MalformedURLException;
7 import java.net.URL;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.concurrent.Executor;
11 import java.util.concurrent.Executors;
12 import java.util.logging.Level;
13 import java.util.logging.Logger;
14
15 import de.ina.util.service.AbstractService;
16 import de.ina.util.thread.DumpingThreadFactory;
17 import de.ina.util.validation.Validation;
18
19 /**
20  * Arachne’s core.
21  *
22  * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
23  */
24 public class Core extends AbstractService {
25
26         /** The logger. */
27         private static final Logger logger = Logger.getLogger(Core.class.getName());
28
29         //
30         // PROPERTIES
31         //
32
33         /** The host of the freenet node. */
34         private String nodeHost = "localhost";
35
36         /** The port of the freenet node. */
37         private int nodePort = 8888;
38
39         //
40         // INTERNAL MEMBERS
41         //
42
43         /** Thread pool for the URL fetches. */
44         private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
45
46         /** The current list of URLs to crawl. */
47         private final List<Page> pages = new ArrayList<Page>();
48
49         //
50         // ACTIONS
51         //
52
53         /**
54          * Adds the given URL to the list of pages to crawl.
55          *
56          * @param url
57          *            The URL to add
58          */
59         public void addPage(URL url) {
60                 Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check();
61                 String path = url.getPath();
62                 if (path.length() == 0) {
63                         path = "/";
64                 }
65                 String[] pathComponents = path.split("/");
66                 if (pathComponents.length < 2) {
67                         throw new IllegalArgumentException("URL “" + url + "” is not a valid freenet page.");
68                 }
69                 String siteName = pathComponents[1];
70                 String[] siteComponents = siteName.split("@");
71                 if (siteComponents.length != 2) {
72                         throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
73                 }
74                 if (!"USK".equals(siteComponents[0]) && !"SSK".equals(siteComponents[0]) && !"CHK".equals(siteComponents[0])) {
75                         throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
76                 }
77                 if ("USK".equals(siteComponents[0])) {
78                         Site site = new Site(siteComponents[1], pathComponents[2]);
79                         Edition edition = new Edition(site, Integer.parseInt(pathComponents[3]));
80                         Page page = new Page(edition, createPath(pathComponents, 4));
81                         addPage(page);
82                 }
83                 if ("SSK".equals(siteComponents[0])) {
84                         int lastDash = pathComponents[2].lastIndexOf('-');
85                         String basename = pathComponents[2].substring(0, lastDash);
86                         int editionNumber = Integer.parseInt(pathComponents[2].substring(lastDash + 1));
87                         Site site = new Site(siteComponents[1], basename);
88                         Edition edition = new Edition(site, editionNumber);
89                         Page page = new Page(edition, createPath(pathComponents, 3));
90                         addPage(page);
91                 }
92                 /* TODO: handle CHK */
93         }
94
95         /**
96          * Adds the given URL to the list of pages to crawl.
97          *
98          * @param url
99          *            The URL of the page to crawl
100          * @throws MalformedURLException
101          *             if the URL is not a valid URL
102          */
103         public void addPage(String url) throws MalformedURLException {
104                 Validation.begin().isNotNull("url", (Object) url).check();
105                 addPage(new URL(url));
106         }
107
108         /**
109          * Adds the given page to the list of pages to crawl.
110          *
111          * @param page
112          *            The page to add
113          */
114         public void addPage(Page page) {
115                 Validation.begin().isNotNull("page", page).check();
116                 pages.add(page);
117                 notifySyncObject();
118         }
119
120         //
121         // SERVICE METHODS
122         //
123
124         /**
125          * {@inheritdoc}
126          *
127          * @see de.ina.util.service.AbstractService#serviceRun()
128          */
129         @Override
130         protected void serviceRun() {
131                 while (!shouldStop()) {
132                         Page nextPage = null;
133                         synchronized (syncObject) {
134                                 while (!shouldStop() && pages.isEmpty()) {
135                                         try {
136                                                 syncObject.wait();
137                                         } catch (InterruptedException ie1) {
138                                                 /* ignore. */
139                                         }
140                                 }
141                                 if (!shouldStop()) {
142                                         nextPage = pages.remove(0);
143                                 }
144                         }
145                         if (shouldStop()) {
146                                 break;
147                         }
148                         URL nextURL = createURL(nextPage);
149                         if (nextURL == null) {
150                                 logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
151                                 continue;
152                         }
153                         URLFetcher urlFetcher = new URLFetcher(this, nextURL);
154                         urlFetcherExecutor.execute(urlFetcher);
155                 }
156         }
157
158         //
159         // PRIVATE METHODS
160         //
161
162         /**
163          * Creates a path from the given String array, starting at the given index.
164          * The path is created by joining all Strings from the array, separating
165          * them with a slash (‘/’).
166          *
167          * @param pathComponents
168          *            The array of path components
169          * @param index
170          *            The index of the first path components
171          * @return The joined path
172          */
173         private String createPath(String[] pathComponents, int index) {
174                 Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", index, pathComponents.length).check();
175                 StringBuilder path = new StringBuilder();
176                 for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
177                         if (path.length() > 0) {
178                                 path.append('/');
179                         }
180                         path.append(pathComponents[pathComponentIndex]);
181                 }
182                 return path.toString();
183         }
184
185         /**
186          * Creates a URL from the given page.
187          *
188          * @param page
189          *            The page to create a URL from
190          * @return The created URL, or <code>null</code> if the URL could not be
191          *         created
192          */
193         private URL createURL(Page page) {
194                 try {
195                         return new URL("http://" + nodeHost + ":" + nodePort + "/");
196                 } catch (MalformedURLException mue1) {
197                         /* nearly impossible. */
198                 }
199                 return null;
200         }
201
202 }