Add method to set the node’s hostname.
[arachne.git] / src / net / pterodactylus / arachne / core / Core.java
1 /*
2  * © 2009 David ‘Bombe’ Roden
3  */
4 package net.pterodactylus.arachne.core;
5
6 import java.net.MalformedURLException;
7 import java.net.URL;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.concurrent.Executor;
11 import java.util.concurrent.Executors;
12 import java.util.logging.Level;
13 import java.util.logging.Logger;
14
15 import de.ina.util.service.AbstractService;
16 import de.ina.util.thread.DumpingThreadFactory;
17 import de.ina.util.validation.Validation;
18
19 /**
20  * Arachne’s core.
21  *
22  * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
23  */
24 public class Core extends AbstractService {
25
26         /** The logger. */
27         private static final Logger logger = Logger.getLogger(Core.class.getName());
28
29         //
30         // PROPERTIES
31         //
32
33         /** The host of the freenet node. */
34         private String nodeHost = "localhost";
35
36         /** The port of the freenet node. */
37         private int nodePort = 8888;
38
39         //
40         // INTERNAL MEMBERS
41         //
42
43         /** Thread pool for the URL fetches. */
44         private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
45
46         /** The current list of URLs to crawl. */
47         private final List<Page> pages = new ArrayList<Page>();
48
49         //
50         // ACCESSORS
51         //
52
53         /**
54          * Sets the host name of the node.
55          *
56          * @param nodeHost
57          *            The node’s host name
58          */
59         public void setNodeHost(String nodeHost) {
60                 this.nodeHost = nodeHost;
61         }
62
63         //
64         // ACTIONS
65         //
66
67         /**
68          * Adds the given URL to the list of pages to crawl.
69          *
70          * @param url
71          *            The URL to add
72          */
73         public void addPage(URL url) {
74                 Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check();
75                 String path = url.getPath();
76                 if (path.length() == 0) {
77                         path = "/";
78                 }
79                 String[] pathComponents = path.split("/");
80                 if (pathComponents.length < 2) {
81                         throw new IllegalArgumentException("URL “" + url + "” is not a valid freenet page.");
82                 }
83                 String siteName = pathComponents[1];
84                 String[] siteComponents = siteName.split("@");
85                 if (siteComponents.length != 2) {
86                         throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
87                 }
88                 if (!"USK".equals(siteComponents[0]) && !"SSK".equals(siteComponents[0]) && !"CHK".equals(siteComponents[0])) {
89                         throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
90                 }
91                 if ("USK".equals(siteComponents[0])) {
92                         Site site = new Site(siteComponents[1], pathComponents[2]);
93                         Edition edition = new Edition(site, Integer.parseInt(pathComponents[3]));
94                         Page page = new Page(edition, createPath(pathComponents, 4));
95                         addPage(page);
96                 }
97                 if ("SSK".equals(siteComponents[0])) {
98                         int lastDash = pathComponents[2].lastIndexOf('-');
99                         String basename = pathComponents[2].substring(0, lastDash);
100                         int editionNumber = Integer.parseInt(pathComponents[2].substring(lastDash + 1));
101                         Site site = new Site(siteComponents[1], basename);
102                         Edition edition = new Edition(site, editionNumber);
103                         Page page = new Page(edition, createPath(pathComponents, 3));
104                         addPage(page);
105                 }
106                 /* TODO: handle CHK */
107         }
108
109         /**
110          * Adds the given URL to the list of pages to crawl.
111          *
112          * @param url
113          *            The URL of the page to crawl
114          * @throws MalformedURLException
115          *             if the URL is not a valid URL
116          */
117         public void addPage(String url) throws MalformedURLException {
118                 Validation.begin().isNotNull("url", (Object) url).check();
119                 addPage(new URL(url));
120         }
121
122         /**
123          * Adds the given page to the list of pages to crawl.
124          *
125          * @param page
126          *            The page to add
127          */
128         public void addPage(Page page) {
129                 Validation.begin().isNotNull("page", page).check();
130                 pages.add(page);
131                 notifySyncObject();
132         }
133
134         //
135         // SERVICE METHODS
136         //
137
138         /**
139          * {@inheritdoc}
140          *
141          * @see de.ina.util.service.AbstractService#serviceRun()
142          */
143         @Override
144         protected void serviceRun() {
145                 while (!shouldStop()) {
146                         Page nextPage = null;
147                         synchronized (syncObject) {
148                                 while (!shouldStop() && pages.isEmpty()) {
149                                         try {
150                                                 syncObject.wait();
151                                         } catch (InterruptedException ie1) {
152                                                 /* ignore. */
153                                         }
154                                 }
155                                 if (!shouldStop()) {
156                                         nextPage = pages.remove(0);
157                                 }
158                         }
159                         if (shouldStop()) {
160                                 break;
161                         }
162                         URL nextURL = createURL(nextPage);
163                         if (nextURL == null) {
164                                 logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
165                                 continue;
166                         }
167                         URLFetcher urlFetcher = new URLFetcher(this, nextURL);
168                         urlFetcherExecutor.execute(urlFetcher);
169                 }
170         }
171
172         //
173         // PRIVATE METHODS
174         //
175
176         /**
177          * Creates a path from the given String array, starting at the given index.
178          * The path is created by joining all Strings from the array, separating
179          * them with a slash (‘/’).
180          *
181          * @param pathComponents
182          *            The array of path components
183          * @param index
184          *            The index of the first path components
185          * @return The joined path
186          */
187         private String createPath(String[] pathComponents, int index) {
188                 Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", index, pathComponents.length).check();
189                 StringBuilder path = new StringBuilder();
190                 for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
191                         if (path.length() > 0) {
192                                 path.append('/');
193                         }
194                         path.append(pathComponents[pathComponentIndex]);
195                 }
196                 return path.toString();
197         }
198
199         /**
200          * Creates a URL from the given page.
201          *
202          * @param page
203          *            The page to create a URL from
204          * @return The created URL, or <code>null</code> if the URL could not be
205          *         created
206          */
207         private URL createURL(Page page) {
208                 try {
209                         return new URL("http://" + nodeHost + ":" + nodePort + "/");
210                 } catch (MalformedURLException mue1) {
211                         /* nearly impossible. */
212                 }
213                 return null;
214         }
215
216 }