From ba4d2e74cf5eaefa41ac2e6409ada8765ef96c61 Mon Sep 17 00:00:00 2001 From: riderkick Date: Sat, 9 May 2015 17:13:07 +0800 Subject: [PATCH] mangastream: rewrite all script --- .../MangaStream/chapter_page_number.inc | 28 ++++--- baseunits/includes/MangaStream/image_url.inc | 34 ++++---- .../MangaStream/manga_information.inc | 81 +++++++++---------- .../includes/MangaStream/names_and_links.inc | 51 +++++++----- baseunits/uBaseUnit.pas | 1 - 5 files changed, 95 insertions(+), 100 deletions(-) diff --git a/baseunits/includes/MangaStream/chapter_page_number.inc b/baseunits/includes/MangaStream/chapter_page_number.inc index 822e5a8f2..cfbe82149 100644 --- a/baseunits/includes/MangaStream/chapter_page_number.inc +++ b/baseunits/includes/MangaStream/chapter_page_number.inc @@ -1,33 +1,35 @@ function GetMangaStreamPageNumber: Boolean; var - s: String; - i: Cardinal; + i: Integer; l: TStringList; begin l := TStringList.Create; parse := TStringList.Create; - s := DecodeUrl(FillMangaSiteHost(MANGASTREAM_ID, URL) + '/1'); Result := GetPage(TObject(l), - s, + FillMangaSiteHost(MANGASTREAM_ID, URL) + '/1', manager.container.manager.retryConnect); + Parser := THTMLParser.Create(PChar(l.Text)); - Parser.OnFoundTag := OnTag; - Parser.OnFoundText := OnText; - Parser.Exec; - Parser.Free; + try + Parser.OnFoundTag := OnTag; + Parser.OnFoundText := OnText; + Parser.Exec; + finally + Parser.Free + end; + if parse.Count > 0 then begin manager.container.pageNumber := 0; for i := 0 to parse.Count - 1 do - begin - if (Pos('Last Page (', parse.Strings[i]) > 0) then + if Pos('Last Page (', parse[i]) > 0 then begin - manager.container.PageNumber := - StrToInt(TrimLeft(TrimRight(GetString(parse.Strings[i], 'Last Page (', ')')))); + manager.container.PageNumber := StrToIntDef(ReplaceRegExpr( + '^.*Last\s*Page\s*\((\d+)\).*$', parse[i], '$1', True), 1); Break; end; - end; end; + parse.Free; l.Free; end; diff --git a/baseunits/includes/MangaStream/image_url.inc b/baseunits/includes/MangaStream/image_url.inc index d8127d545..79ac0b8ce 100644 --- a/baseunits/includes/MangaStream/image_url.inc +++ b/baseunits/includes/MangaStream/image_url.inc @@ -1,36 +1,32 @@ function GetMangaStreamImageURL: Boolean; var s: String; - i: Cardinal; + i: Integer; l: TStringList; begin l := TStringList.Create; - s := DecodeUrl(URL + '/' + IntToStr(workCounter + 1)); - if not (Pos('http', LowerCase(s)) = 1) then + s := URL + '/' + IntToStr(workCounter + 1); + if Pos('http', LowerCase(s)) <> 1 then s := MANGASTREAM_ROOT2 + s; - Result := GetPage(TObject(l), - s, - manager.container.Manager.retryConnect); - if Self.Terminated then - begin - l.Free; - Exit; - end; + + Result := GetPage(TObject(l), s, manager.container.Manager.retryConnect); parse := TStringList.Create; - Parser := THTMLParser.Create(PChar(l.Text)); - Parser.OnFoundTag := OnTag; - Parser.OnFoundText := OnText; - Parser.Exec; - Parser.Free; + try + Parser := THTMLParser.Create(PChar(l.Text)); + Parser.OnFoundTag := OnTag; + Parser.OnFoundText := OnText; + Parser.Exec; + finally + Parser.Free; + end; if parse.Count > 0 then begin for i := 0 to parse.Count - 1 do - if (Pos('id="manga-page"', parse.Strings[i]) > 0) then + if (GetTagName(parse[i]) = 'img') and (GetVal(parse[i], 'id') = 'manga-page') then begin - manager.container.PageLinks.Strings[workCounter] := - GetAttributeValue(GetTagAttribute(parse.Strings[i], 'src=')); + manager.container.PageLinks.Strings[workCounter] := GetVal(parse[i], 'src'); Break; end; end; diff --git a/baseunits/includes/MangaStream/manga_information.inc b/baseunits/includes/MangaStream/manga_information.inc index 9161bc067..74266dbdb 100644 --- a/baseunits/includes/MangaStream/manga_information.inc +++ b/baseunits/includes/MangaStream/manga_information.inc @@ -1,9 +1,10 @@ function GetMangaStreamInfoFromURL: Byte; var + i: Integer; s: String; - isExtractChapter: Boolean = True; - i, j: Cardinal; + isExtractChapter: Boolean = False; begin + mangaInfo.website := WebsiteRoots[MANGASTREAM_ID, 0]; mangaInfo.url := FillMangaSiteHost(MANGASTREAM_ID, URL); if not GetPage(TObject(Source), mangaInfo.url, Reconnect) then begin @@ -13,60 +14,50 @@ end; // parsing the HTML source - parse.Clear; - Parser := THTMLParser.Create(PChar(Source.Text)); - Parser.OnFoundTag := OnTag; - Parser.OnFoundText := OnText; - Parser.Exec; - Parser.Free; + Parser := THTMLParser.Create(PChar(Source.Text)); + try + Parser.OnFoundTag := OnTag; + Parser.OnFoundText := OnText; + parse.Clear; + Parser.Exec; + finally + Parser.Free; + end; Source.Free; - mangaInfo.website := WebsiteRoots[MANGASTREAM_ID, 0]; - - mangaInfo.coverLink := ''; - mangaInfo.summary := ''; - mangaInfo.authors := ''; - mangaInfo.artists := ''; - mangaInfo.genres := ''; - mangaInfo.status := '1'; - // using parser (cover link, summary, chapter name and link) if parse.Count = 0 then Exit; + + mangaInfo.status := '1'; for i := 0 to parse.Count - 1 do begin - // do not allow to get chapter name and links - if (isExtractChapter) and - (Pos('', parse.Strings[i]) > 0) then - isExtractChapter := False; + //title + if mangaInfo.title = '' then + if GetTagName(parse[i]) = 'h1' then + mangaInfo.title := CommonStringFilter(parse[i + 1]); - // get chapter name and links - if (isExtractChapter) and - (Pos('', parse.Strings[i]) > 0) and - (Pos('', parse.Strings[i + 4]) > 0) then + //chapters + if (GetTagName(parse[i]) = 'table') and (GetVal(parse[i], 'class') = 'table table-striped') then + isExtractChapter := True; + if isExtractChapter then begin - Inc(mangaInfo.numChapter); - s := StringReplace(GetAttributeValue(GetTagAttribute(parse.Strings[i + 1], 'href=')), - MANGASTREAM_ROOT2, '', []); - Delete(s, Length(s) - 1, 2); - mangaInfo.chapterLinks.Add(s); - s := RemoveSymbols(TrimLeft(TrimRight(parse.Strings[i + 2]))); - mangaInfo.chapterName.Add(StringFilter(StringFilter(HTMLEntitiesFilter(s)))); + if GetTagName(parse[i]) = '/table' then + Break + else + if GetTagName(parse[i]) = 'a' then + begin + Inc(mangaInfo.numChapter); + mangaInfo.chapterName.Add(parse[i + 1]); + s := GetVal(parse[i], 'href'); + if Pos('http', LowerCase(s)) <> 1 then + s := MANGASTREAM_ROOT2 + s; + mangaInfo.chapterLinks.Add(s); + end; end; end; - // Since chapter name and link are inverted, we need to invert them - if mangainfo.ChapterLinks.Count > 1 then - begin - i := 0; - j := mangainfo.ChapterLinks.Count - 1; - while (i < j) do - begin - mangainfo.ChapterName.Exchange(i, j); - mangainfo.chapterLinks.Exchange(i, j); - Inc(i); - Dec(j); - end; - end; + // invert chapters + InvertStrings([mangaInfo.chapterName, mangaInfo.chapterLinks]); Result := NO_ERROR; end; diff --git a/baseunits/includes/MangaStream/names_and_links.inc b/baseunits/includes/MangaStream/names_and_links.inc index 082d12e11..d6c2602a9 100644 --- a/baseunits/includes/MangaStream/names_and_links.inc +++ b/baseunits/includes/MangaStream/names_and_links.inc @@ -1,36 +1,43 @@ function MangaStreamGetNamesAndLinks: Byte; var - i: Cardinal; - s: String; + i: Integer; + isExtractNames: Boolean = False; begin Result := INFORMATION_NOT_FOUND; - if not GetPage(TObject(Source), MANGASTREAM_ROOT + MANGASTREAM_BROWSER, 0) then + if not GetPage(TObject(Source), MANGASTREAM_ROOT + '/manga', 3) then begin Result := NET_PROBLEM; Source.Free; Exit; end; - parse.Clear; + Parser := THTMLParser.Create(PChar(Source.Text)); - Parser.OnFoundTag := OnTag; - Parser.OnFoundText := OnText; - Parser.Exec; - Parser.Free; - if parse.Count = 0 then - begin - Source.Free; - Exit; + try + Parser.OnFoundTag := OnTag; + Parser.OnFoundText := OnText; + parse.Clear; + Parser.Exec; + finally + Parser.Free; end; - for i := 0 to parse.Count - 1 do - begin - if (Pos('/manga/', parse.Strings[i]) > 0) then + + if parse.Count > 0 then + for i := 0 to parse.Count - 1 do begin - Result := NO_ERROR; - s := StringFilter(TrimLeft(TrimRight(parse.Strings[i + 1]))); - names.Add(HTMLEntitiesFilter(s)); - s := GetAttributeValue(GetTagAttribute(parse.Strings[i], 'href="')); - links.Add(StringReplace(s, MANGASTREAM_ROOT, '', [])); + if (GetTagName(parse[i]) = 'table') and (GetVal(parse[i], 'class') = 'table table-striped') then + isExtractNames := True; + if isExtractNames then + begin + if GetTagName(parse[i]) = '/table' then + Break + else + if GetTagName(parse[i]) = 'a' then + if GetVal(parse[i], 'class') <> 'chapter-link' then + begin + links.Add(GetVal(parse[i], 'href')); + names.Add(CommonStringFilter(parse[i + 1])); + end; + end; end; - end; Source.Free; - end; \ No newline at end of file + end; diff --git a/baseunits/uBaseUnit.pas b/baseunits/uBaseUnit.pas index a25853878..a786b5065 100644 --- a/baseunits/uBaseUnit.pas +++ b/baseunits/uBaseUnit.pas @@ -502,7 +502,6 @@ interface MANGASTREAM_ROOT: String = 'http://mangastream.com'; MANGASTREAM_ROOT2: String = 'http://readms.com'; - MANGASTREAM_BROWSER: String = '/manga'; MANGAEDEN_BROWSER: String = '/en-directory/'; MANGAEDEN_BROWSER_1: String = '/en-directory/';