- private String getAuthor(URL source, InputStream in) throws IOException {
- String author = getLine(in, "href=\"/post/show/", 0);
- if (author != null) {
- String key = "href=\"";
- int pos = author.indexOf(key);
- if (pos >= 0) {
- author = author.substring(pos + key.length());
- pos = author.indexOf("\"");
- if (pos >= 0) {
- author = author.substring(0, pos - 1);
- String page = source.getProtocol() + "://"
- + source.getHost() + author;
- try {
- InputStream pageIn = Instance.getCache().open(
- new URL(page), this, false);
- try {
- key = "class=\"tag-type-artist\"";
- author = getLine(pageIn, key, 0);
- if (author != null) {
- pos = author.indexOf("<a href=\"");
- if (pos >= 0) {
- author = author.substring(pos);
- pos = author.indexOf("</a>");
- if (pos >= 0) {
- author = author.substring(0, pos);
- return StringUtils.unhtml(author);
- }
- }
- }
- } finally {
- pageIn.close();
- }
- } catch (Exception e) {
- // No author found
+ @Override
+ protected URL getCanonicalUrl(URL source) {
+ // Convert search-pools into proper pools
+ if (source.getPath().equals("/posts") && source.getQuery() != null
+ && source.getQuery().startsWith("tags=pool%3A")) {
+ String poolNumber = source.getQuery()
+ .substring("tags=pool%3A".length());
+ try {
+ Integer.parseInt(poolNumber);
+ String base = source.getProtocol() + "://" + source.getHost();
+ if (source.getPort() != -1) {
+ base = base + ":" + source.getPort();
+ }
+ source = new URL(base + "/pools/" + poolNumber);
+ } catch (NumberFormatException e) {
+ // Not a simple pool, skip
+ } catch (MalformedURLException e) {
+ // Cannot happen
+ }
+ }
+
+ if (isSetOriginalUrl(source)) {
+ try {
+ Document doc = DataUtil.load(Instance.getInstance().getCache()
+ .open(source, this, false), "UTF-8", source.toString());
+ for (Element shortname : doc
+ .getElementsByClass("set-shortname")) {
+ for (Element el : shortname.getElementsByTag("a")) {
+ if (!el.attr("href").isEmpty())
+ return new URL(el.absUrl("href"));