fanfix/supported/MangaFox.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.AbstractMap;
   8 import java.util.ArrayList;
   9 import java.util.Collections;
  10 import java.util.List;
  11 import java.util.Map.Entry;
  12 import java.util.SortedMap;
  13 import java.util.TreeMap;
  14
  15 import org.jsoup.helper.DataUtil;
  16 import org.jsoup.nodes.Element;
  17 import org.jsoup.select.Elements;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.data.MetaData;
  21 import be.nikiroo.utils.Image;
  22 import be.nikiroo.utils.Progress;
  23 import be.nikiroo.utils.StringUtils;
  24
  25 class MangaFox extends BasicSupport {
  26         @Override
  27         protected boolean isHtml() {
  28                 return true;
  29         }
  30
  31         @Override
  32         public String getSourceName() {
  33                 return "MangaFox.me";
  34         }
  35
  36         @Override
  37         protected MetaData getMeta() throws IOException {
  38                 MetaData meta = new MetaData();
  39                 Element doc = getSourceNode();
  40
  41                 Element title = doc.getElementById("title");
  42                 Elements table = null;
  43                 if (title != null) {
  44                         table = title.getElementsByTag("table");
  45                 }
  46                 if (table != null) {
  47                         // Rows: header, data
  48                         Elements rows = table.first().getElementsByTag("tr");
  49                         if (rows.size() > 1) {
  50                                 table = rows.get(1).getElementsByTag("td");
  51                                 // Columns: Realeased, Authors, Artists, Genres
  52                                 if (table.size() < 4) {
  53                                         table = null;
  54                                 }
  55                         }
  56                 }
  57
  58                 meta.setTitle(getTitle());
  59                 if (table != null) {
  60                         meta.setAuthor(getAuthors(table.get(1).text() + ","
  61                                         + table.get(2).text()));
  62
  63                         meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
  64                         meta.setTags(explode(table.get(3).text()));
  65                 }
  66                 meta.setSource(getSourceName());
  67                 meta.setUrl(getSource().toString());
  68                 meta.setPublisher(getSourceName());
  69                 meta.setUuid(getSource().toString());
  70                 meta.setLuid("");
  71                 meta.setLang("en");
  72                 meta.setSubject("manga");
  73                 meta.setType(getType().toString());
  74                 meta.setImageDocument(true);
  75                 meta.setCover(getCover());
  76
  77                 return meta;
  78         }
  79
  80         private String getTitle() {
  81                 Element doc = getSourceNode();
  82
  83                 Element title = doc.getElementById("title");
  84                 Element h1 = title.getElementsByTag("h1").first();
  85                 if (h1 != null) {
  86                         return StringUtils.unhtml(h1.text()).trim();
  87                 }
  88
  89                 return null;
  90         }
  91
  92         private String getAuthors(String authorList) {
  93                 String author = "";
  94                 for (String auth : explode(authorList)) {
  95                         if (!author.isEmpty()) {
  96                                 author = author + ", ";
  97                         }
  98                         author += auth;
  99                 }
 100
 101                 return author;
 102         }
 103
 104         @Override
 105         protected String getDesc() {
 106                 Element doc = getSourceNode();
 107                 Element title = doc.getElementsByClass("summary").first();
 108                 if (title != null) {
 109                         StringUtils.unhtml(title.text()).trim();
 110                 }
 111
 112                 return null;
 113         }
 114
 115         private Image getCover() {
 116                 Element doc = getSourceNode();
 117                 Element cover = doc.getElementsByClass("cover").first();
 118                 if (cover != null) {
 119                         cover = cover.getElementsByTag("img").first();
 120                 }
 121
 122                 if (cover != null) {
 123                         String coverUrl = cover.absUrl("src");
 124
 125                         InputStream coverIn;
 126                         try {
 127                                 coverIn = openEx(coverUrl);
 128                                 try {
 129                                         return new Image(coverIn);
 130                                 } finally {
 131                                         coverIn.close();
 132                                 }
 133                         } catch (IOException e) {
 134                                 Instance.getTraceHandler().error(e);
 135                         }
 136                 }
 137
 138                 return null;
 139         }
 140
 141         @Override
 142         protected List<Entry<String, URL>> getChapters(Progress pg) {
 143                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 144
 145                 String prefix = null; // each chapter starts with this prefix, then a
 146                                                                 // chapter number (including "x.5"), then name
 147
 148                 Element doc = getSourceNode();
 149                 for (Element li : doc.getElementsByTag("li")) {
 150                         Element el = li.getElementsByTag("h4").first();
 151                         if (el == null) {
 152                                 el = li.getElementsByTag("h3").first();
 153                         }
 154                         if (el != null) {
 155                                 Element a = el.getElementsByTag("a").first();
 156                                 if (a != null) {
 157                                         String title = StringUtils.unhtml(el.text()).trim();
 158                                         try {
 159                                                 String url = a.absUrl("href");
 160                                                 if (url.endsWith("1.html")) {
 161                                                         url = url.substring(0,
 162                                                                         url.length() - "1.html".length());
 163                                                 }
 164                                                 if (!url.endsWith("/")) {
 165                                                         url += "/";
 166                                                 }
 167
 168                                                 if (prefix == null || !prefix.isEmpty()) {
 169                                                         StringBuilder possiblePrefix = new StringBuilder(
 170                                                                         StringUtils.unhtml(a.text()).trim());
 171                                                         while (possiblePrefix.length() > 0) {
 172                                                                 char car = possiblePrefix.charAt(possiblePrefix
 173                                                                                 .length() - 1);
 174                                                                 boolean punctuation = (car == '.' || car == ' ');
 175                                                                 boolean digit = (car >= '0' && car <= '9');
 176                                                                 if (!punctuation && !digit) {
 177                                                                         break;
 178                                                                 }
 179
 180                                                                 possiblePrefix.setLength(possiblePrefix
 181                                                                                 .length() - 1);
 182                                                         }
 183
 184                                                         if (prefix == null) {
 185                                                                 prefix = possiblePrefix.toString();
 186                                                         }
 187
 188                                                         if (!prefix.equalsIgnoreCase(possiblePrefix
 189                                                                         .toString())) {
 190                                                                 prefix = ""; // prefix not ok
 191                                                         }
 192                                                 }
 193
 194                                                 urls.add(new AbstractMap.SimpleEntry<String, URL>(
 195                                                                 title, new URL(url)));
 196                                         } catch (Exception e) {
 197                                                 Instance.getTraceHandler().error(e);
 198                                         }
 199                                 }
 200                         }
 201                 }
 202
 203                 if (prefix != null && !prefix.isEmpty()) {
 204                         try {
 205                                 // We found a prefix, so everything should be sortable
 206                                 SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
 207                                 for (Entry<String, URL> entry : urls) {
 208                                         String num = entry.getKey().substring(prefix.length() + 1)
 209                                                         .trim();
 210                                         String name = "";
 211                                         int pos = num.indexOf(' ');
 212                                         if (pos >= 0) {
 213                                                 name = num.substring(pos).trim();
 214                                                 num = num.substring(0, pos).trim();
 215                                         }
 216
 217                                         if (!name.isEmpty()) {
 218                                                 name = "Tome " + num + ": " + name;
 219                                         } else {
 220                                                 name = "Tome " + num;
 221                                         }
 222
 223                                         double key = Double.parseDouble(num);
 224
 225                                         map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
 226                                                         entry.getValue()));
 227                                 }
 228                                 urls = new ArrayList<Entry<String, URL>>(map.values());
 229                         } catch (NumberFormatException e) {
 230                                 Instance.getTraceHandler()
 231                                                 .error(new IOException(
 232                                                                 "Cannot find a tome number, revert to default sorting",
 233                                                                 e));
 234                                 // by default, the chapters are in reversed order
 235                                 Collections.reverse(urls);
 236                         }
 237                 } else {
 238                         // by default, the chapters are in reversed order
 239                         Collections.reverse(urls);
 240                 }
 241
 242                 return urls;
 243         }
 244
 245         @Override
 246         protected String getChapterContent(URL chapUrl, int number, Progress pg)
 247                         throws IOException {
 248                 if (pg == null) {
 249                         pg = new Progress();
 250                 }
 251
 252                 StringBuilder builder = new StringBuilder();
 253
 254                 String url = chapUrl.toString();
 255                 InputStream imageIn = null;
 256                 Element imageDoc = null;
 257
 258                 // 1. find out how many images there are
 259                 int size;
 260                 try {
 261                         // note: when used, the base URL can be an ad-page
 262                         imageIn = openEx(url + "1.html");
 263                         imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
 264                 } catch (IOException e) {
 265                         Instance.getTraceHandler().error(
 266                                         new IOException("Cannot get image " + 1 + " of manga", e));
 267                 } finally {
 268                         if (imageIn != null) {
 269                                 imageIn.close();
 270                         }
 271                 }
 272                 Element select = imageDoc.getElementsByClass("m").first();
 273                 Elements options = select.getElementsByTag("option");
 274                 size = options.size() - 1; // last is "Comments"
 275
 276                 pg.setMinMax(0, size);
 277
 278                 // 2. list them
 279                 for (int i = 1; i <= size; i++) {
 280                         if (i > 1) { // because first one was opened for size
 281                                 try {
 282                                         imageIn = openEx(url + i + ".html");
 283                                         imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
 284                                                         + ".html");
 285
 286                                         String linkImage = imageDoc.getElementById("image").absUrl(
 287                                                         "src");
 288                                         if (linkImage != null) {
 289                                                 builder.append("[");
 290                                                 // to help with the retry and the originalUrl, part 1
 291                                                 builder.append(withoutQuery(linkImage));
 292                                                 builder.append("]<br/>");
 293                                         }
 294
 295                                         // to help with the retry and the originalUrl, part 2
 296                                         refresh(linkImage);
 297                                 } catch (IOException e) {
 298                                         Instance.getTraceHandler().error(
 299                                                         new IOException("Cannot get image " + i
 300                                                                         + " of manga", e));
 301                                 } finally {
 302                                         if (imageIn != null) {
 303                                                 imageIn.close();
 304                                         }
 305                                 }
 306                         }
 307                 }
 308
 309                 return builder.toString();
 310         }
 311
 312         /**
 313          * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
 314          *
 315          * @param url
 316          *            the URL to refresh
 317          *
 318          * @return TRUE if it was refreshed
 319          */
 320         private boolean refresh(String url) {
 321                 try {
 322                         openEx(url).close();
 323                         return true;
 324                 } catch (Exception e) {
 325                         return false;
 326                 }
 327         }
 328
 329         /**
 330          * Open the URL through the cache, but: retry a second time after 100ms if
 331          * it fails, remove the query part of the {@link URL} before saving it to
 332          * the cache (so it can be recalled later).
 333          *
 334          * @param url
 335          *            the {@link URL}
 336          *
 337          * @return the resource
 338          *
 339          * @throws IOException
 340          *             in case of I/O error
 341          */
 342         private InputStream openEx(String url) throws IOException {
 343                 try {
 344                         return Instance.getCache().open(new URL(url), this, true,
 345                                         withoutQuery(url));
 346                 } catch (Exception e) {
 347                         // second chance
 348                         try {
 349                                 Thread.sleep(100);
 350                         } catch (InterruptedException ee) {
 351                         }
 352
 353                         return Instance.getCache().open(new URL(url), this, true,
 354                                         withoutQuery(url));
 355                 }
 356         }
 357
 358         /**
 359          * Return the same input {@link URL} but without the query part.
 360          *
 361          * @param url
 362          *            the inpiut {@link URL} as a {@link String}
 363          *
 364          * @return the input {@link URL} without query
 365          */
 366         private URL withoutQuery(String url) {
 367                 URL o = null;
 368                 try {
 369                         // Remove the query from o (originalUrl), so it can be cached
 370                         // correctly
 371                         o = new URL(url);
 372                         o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
 373
 374                         return o;
 375                 } catch (MalformedURLException e) {
 376                         return null;
 377                 }
 378         }
 379
 380         /**
 381          * Explode an HTML comma-separated list of values into a non-duplicate text
 382          * {@link List} .
 383          *
 384          * @param values
 385          *            the comma-separated values in HTML format
 386          *
 387          * @return the full list with no duplicate in text format
 388          */
 389         private List<String> explode(String values) {
 390                 List<String> list = new ArrayList<String>();
 391                 if (values != null && !values.isEmpty()) {
 392                         for (String auth : values.split(",")) {
 393                                 String a = StringUtils.unhtml(auth).trim();
 394                                 if (!a.isEmpty() && !list.contains(a.trim())) {
 395                                         list.add(a);
 396                                 }
 397                         }
 398                 }
 399
 400                 return list;
 401         }
 402
 403         @Override
 404         protected boolean supports(URL url) {
 405                 return "mangafox.me".equals(url.getHost())
 406                                 || "www.mangafox.me".equals(url.getHost())
 407                                 || "fanfox.net".equals(url.getHost())
 408                                 || "www.fanfox.net".equals(url.getHost());
 409         }
 410 }