Skip to content

Commit

Permalink
1.6.6. <=> Fixed imdb scraper + multi provider support
Browse files Browse the repository at this point in the history
  • Loading branch information
mynttt committed Mar 19, 2022
1 parent 688d77c commit 06d3fa9
Show file tree
Hide file tree
Showing 14 changed files with 195 additions and 104 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.6.6
- Fixed broken ImdbScraper due to changes on IMDBs website.
- Updated the IMDB resolvement process to now factor in both TVDB/TMDB instead of just choosing one of them. This is in preparation to hopefully soon support IMDB lookup for items that only have a TMDB ID from the new Plex agent.

## 1.6.5
- Updated ImdbScraper to handle new IMDB web design. The scraper will now work again instead of throwing tons of `appears to not be allowed to be rated by anyone` messages.
- Mitigation added to automatically reset the set of scraper blacklisted items for older versions once on start-up, so you don't need to wait for 30 days until the scraper picks up those possibly wrongly blacklisted items again for processing.
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.6.5
1.6.6
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ plugins {
id 'com.github.spotbugs' version '2.0.1'
}

version = '1.6.5'
version = '1.6.6'
sourceCompatibility = '11'

new File(projectDir, "VERSION").text = version;
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/updatetool/Mitigations.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,27 @@ public static void executeMitigations() {
executeTypoSwitchCacheResetMitigation();
executeCacheParameterWrongOrderMitigation();
executeCacheResetForImdbScraperUpdateMitigation();
executeNewAgentMappingFormatReset();
MITIGATIONS.dump();
}

private static void executeNewAgentMappingFormatReset() {
String KEY = "executeNewAgentMappingFormatReset";

if(MITIGATIONS.lookup(KEY) != null)
return;

Logger.info("One time mitigation executed: Reset new-agent-mapping.json for new storage format.");
Logger.info("This mitigation will only be executed once.");

var newAgentMapping = KeyValueStore.of(Main.PWD.resolve("new-agent-mapping.json"));
newAgentMapping.reset();
newAgentMapping.dump();

Logger.info("Mitigation completed!");
MITIGATIONS.cache(KEY, "");
}

private static void executeCacheResetForImdbScraperUpdateMitigation() {
String KEY = "executeCacheResetForImdbScraperUpdateMitigation";

Expand Down
4 changes: 4 additions & 0 deletions src/main/java/updatetool/common/DatabaseSupport.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ public DatabaseSupport(SqliteDatabaseProvider provider) {
this.provider = provider;
}

public enum NewAgentSeriesType {
SERIES, SEASON, EPISODE;
}

public enum LibraryType {
MOVIE(1),
SERIES(2);
Expand Down
11 changes: 10 additions & 1 deletion src/main/java/updatetool/common/HttpRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.Objects;
import java.util.Optional;
import org.tinylog.Logger;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

public class HttpRunner<B, R, P> {
private final int maxTries;
Expand Down Expand Up @@ -35,6 +36,14 @@ public static <T> RunnerResult<T> ofFailure(T result) {
}
}

@SuppressFBWarnings("RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
private static String trim(Object o) {
if (o == null) return null;
String s = o.toString();
if(s == null) return s;
return s.trim();
}

public static class HttpCodeHandler<B, R, P> {
private final Map<Integer, Handler<B, R, P>> handlers = new HashMap<>();
private final Handler<B, R, P> defaultHandler;
Expand All @@ -49,7 +58,7 @@ private HttpCodeHandler(Map<Integer, Handler<B, R, P>> handlers, Handler<B, R, P
this.defaultHandler = defaultHandler == null
? (body, result, payload)
-> {
Logger.error("{} : Unhandled HTTP Error [response={} | payload={}]", identifier, body, body.body());
Logger.error("{} : Unhandled HTTP Error [response={} | payload={}]", identifier, trim(body), trim(body.body()));
return RunnerResult.ofFailure(result);
}
: defaultHandler;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/updatetool/common/externalapis/TvdbApiV4.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import net.minidev.json.JSONArray;
import net.minidev.json.JSONObject;
import updatetool.common.DatabaseSupport.LibraryType;
import updatetool.common.DatabaseSupport.NewAgentSeriesType;
import updatetool.common.HttpRunner;
import updatetool.common.KeyValueStore;
import updatetool.common.Utility;
Expand Down Expand Up @@ -274,7 +275,7 @@ public void resolveImdbIdForItem(ImdbMetadataResult result) {
if(parts.length == 3) {
resolveLegacyLookup(parts, result);
} else {
runner.run(result.type == LibraryType.MOVIE ? () -> queryForMovie(result.extractedId) : result.hasEpisodeAgentFlag ? () -> queryForEpisode(result.extractedId) : () -> queryForSeries(result.extractedId), result);
runner.run(result.type == LibraryType.MOVIE ? () -> queryForMovie(result.extractedId) : result.seriesType == NewAgentSeriesType.EPISODE ? () -> queryForEpisode(result.extractedId) : () -> queryForSeries(result.extractedId), result);
}
}

Expand Down
128 changes: 66 additions & 62 deletions src/main/java/updatetool/imdb/ImdbDatabaseSupport.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import updatetool.Globals;
import updatetool.Main;
import updatetool.common.DatabaseSupport.LibraryType;
import updatetool.common.DatabaseSupport.NewAgentSeriesType;
import updatetool.common.KeyValueStore;
import updatetool.common.SqliteDatabaseProvider;
import updatetool.common.Utility;
Expand All @@ -36,6 +37,61 @@ public class ImdbDatabaseSupport {
private final KeyValueStore newAgentMapping;
private final ImdbPipelineConfiguration config;

public static class ImdbMetadataResult {
//Id will be resolved in the pipeline and not here
public String imdbId, extractedId;
public String title, hash;
public Integer id, libraryId, index;
public String extraData, guid;
public Double rating, audienceRating;
public boolean resolved;
public LibraryType type;
public NewAgentSeriesType seriesType;

public ImdbMetadataResult() {};

private ImdbMetadataResult(ResultSet rs, LibraryType type) throws SQLException {
this.type = type;
id = rs.getInt(1);
libraryId = rs.getInt(2);
guid = rs.getString(3);
title = rs.getString(4);
extraData = rs.getString(5);
hash = rs.getString(6);
rating = (Double) rs.getObject(7);
audienceRating = (Double) rs.getObject(8);
index = (Integer) rs.getObject(9);
seriesType = guid.startsWith("plex://episode") ? NewAgentSeriesType.EPISODE
: guid.startsWith("plex://season") ? NewAgentSeriesType.SEASON
: guid.startsWith("plex://show") ? NewAgentSeriesType.SERIES : null;
}

@Override
public int hashCode() {
return Objects.hashCode(imdbId);
}

@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ImdbMetadataResult other = (ImdbMetadataResult) obj;
return Objects.equals(id, other.id);
}

@Override
public String toString() {
return "ImdbMetadataResult [imdbId=" + imdbId + ", extractedId=" + extractedId + ", title=" + title
+ ", hash=" + hash + ", id=" + id + ", libraryId=" + libraryId + ", index=" + index + ", extraData="
+ extraData + ", guid=" + guid + ", rating=" + rating + ", audienceRating=" + audienceRating
+ ", resolved=" + resolved + ", type=" + type + ", seriesType=" + seriesType + "]";
}
}

public ImdbDatabaseSupport(SqliteDatabaseProvider provider, KeyValueStore newAgentMapping, ImdbPipelineConfiguration config) {
this.provider = provider;
this.newAgentMapping = newAgentMapping;
Expand Down Expand Up @@ -100,68 +156,16 @@ private void testPlexSqliteBinaryVersion() {
}
}

public static class ImdbMetadataResult {
//Id will be resolved in the pipeline and not here
public String imdbId, extractedId;
public String title, hash;
public Integer id, libraryId;
public String extraData, guid;
public Double rating, audienceRating;
public boolean resolved;
public LibraryType type;
public boolean hasEpisodeAgentFlag;

public ImdbMetadataResult() {};

private ImdbMetadataResult(ResultSet rs, LibraryType type) throws SQLException {
this.type = type;
id = rs.getInt(1);
libraryId = rs.getInt(2);
guid = rs.getString(3);
title = rs.getString(4);
extraData = rs.getString(5);
hash = rs.getString(6);
rating = (Double) rs.getObject(7);
audienceRating = (Double) rs.getObject(8);
hasEpisodeAgentFlag = guid.startsWith("plex://episode");
}

@Override
public int hashCode() {
return Objects.hashCode(imdbId);
}

@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ImdbMetadataResult other = (ImdbMetadataResult) obj;
return Objects.equals(id, other.id);
}

@Override
public String toString() {
return "ImdbMetadataResult [imdbId=" + imdbId + ", extractedId=" + extractedId + ", title=" + title
+ ", hash=" + hash + ", id=" + id + ", libraryId=" + libraryId + ", extraData=" + extraData
+ ", guid=" + guid + ", rating=" + rating + ", audienceRating=" + audienceRating + ", resolved="
+ resolved + ", type=" + type + "]";
}
}

public List<ImdbMetadataResult> requestEntries(long libraryId, LibraryType type) {
return requestMetadata("SELECT id, library_section_id, guid, title, extra_data, hash, rating, audience_rating from metadata_items WHERE media_item_count = 1 AND library_section_id = " + libraryId, type);
return requestMetadata("SELECT id, library_section_id, guid, title, extra_data, hash, rating, audience_rating, \"index\" from metadata_items WHERE media_item_count = 1 AND library_section_id = " + libraryId, type);
}

public List<ImdbMetadataResult> requestTvSeriesRoot(long libraryId) {
return requestMetadata("SELECT id, library_section_id, guid, title, extra_data, hash, rating, audience_rating from metadata_items WHERE media_item_count = 0 AND parent_id IS NULL AND library_section_id = " + libraryId, LibraryType.SERIES);
return requestMetadata("SELECT id, library_section_id, guid, title, extra_data, hash, rating, audience_rating, \"index\" from metadata_items WHERE media_item_count = 0 AND parent_id IS NULL AND library_section_id = " + libraryId, LibraryType.SERIES);
}

public List<ImdbMetadataResult> requestTvSeasonRoot(long libraryId) {
return requestMetadata("SELECT id, library_section_id, guid, title, extra_data, hash, rating, audience_rating from metadata_items WHERE media_item_count = 0 AND parent_id NOT NULL AND library_section_id = " + libraryId, LibraryType.SERIES);
return requestMetadata("SELECT id, library_section_id, guid, title, extra_data, hash, rating, audience_rating, \"index\" from metadata_items WHERE media_item_count = 0 AND parent_id NOT NULL AND library_section_id = " + libraryId, LibraryType.SERIES);
}

private List<ImdbMetadataResult> requestMetadata(String query, LibraryType type) {
Expand Down Expand Up @@ -193,21 +197,21 @@ private boolean updateNewAgentMetadataMapping(ImdbMetadataResult m) throws SQLEx
return false;

String v = newAgentMapping.lookup(m.guid);
if(v != null && v.startsWith("imdb://"))
if(v != null && v.contains("imdb://"))
return false;

String result = null;
StringBuilder sb = new StringBuilder();
try(var handle = provider.queryFor("SELECT t.tag FROM taggings tg LEFT JOIN tags t ON tg.tag_id = t.id AND t.tag_type = 314 WHERE tg.metadata_item_id = " + m.id + " AND t.tag NOT NULL ORDER BY t.tag ASC")) {
while(handle.result().next()) {
String id = handle.result().getString(1);
if(result == null || !result.startsWith("imdb://"))
result = id;
sb.append(handle.result().getString(1)).append("|");
}
}


if(sb.length() > 0) { sb.deleteCharAt(sb.length()-1); }
String result = sb.toString();
boolean returnV = false;

if(result != null) {
if(!result.trim().isBlank()) {
returnV = newAgentMapping.cache(m.guid, result);
if(returnV) {
Logger.info("Associated and cached {} with new movie/TV show agent guid {} ({}).", result, m.guid, m.title);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ public void bootstrap(Map<String, String> args) throws Exception {
}

Logger.info("Capabilities: " + capabilities.toString());

var dbLocation = getDatabaseLocation(plexdata, overrideDatabaseLocation).toAbsolutePath().toString();
var config = new ImdbPipelineConfiguration(apikeyTmdb, apiauthTvdb, plexdata.resolve("Metadata/Movies"), dbLocation, executeUpdatesOverPlexSqliteVersion, capabilities);
job = new ImdbBatchJob(Main.EXECUTOR, config, plexdata, caches, capabilities);
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/updatetool/imdb/ImdbPipeline.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ public void analyseDatabase(ImdbJob job) throws Exception {
resolverTasks.stream().forEach(CompletableFuture::join);
Logger.info("Progress printing watchdog has been stopped. Cancelation status: {}", handle.cancel(true));

Logger.info("Save point: Persisting caches to keep queried look-up data in case of crashes or hang-ups.");
caches.forEach(KeyValueStore::dump);

int resolvedSize = resolved.size();
int itemsSize = items.size();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void ensureAvailability() {
this.rating = scrapedRating;
}
} catch (Exception e) {
Logger.error(e.getClass().getSimpleName() + " exception encountered @ Screen Scraping");
Logger.error(e.getClass().getSimpleName() + " exception encountered @ Screen Scraping [imdb={}]", imdbId);
Logger.error("Please contact the maintainer of the application with the stacktrace below if you think this is unwanted behavior.");
Logger.error("========================================");
Logger.error(e);
Expand Down
15 changes: 11 additions & 4 deletions src/main/java/updatetool/imdb/ImdbScraper.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
import updatetool.common.Capabilities;
import updatetool.common.KeyValueStore;

/*
* Test cases:
* System.out.println(scr.scrapeFallback("tt12850272", "test"));
System.out.println(scr.scrapeFallback("tt13846366", "test"));
System.out.println(scr.scrapeFallback("tt14001894", "test"));
*/

public class ImdbScraper implements Closeable {
private static final String RETURN_LONG_BLACKLIST = "BLS_L";
private static final int SCRAPE_EVERY_N_DAYS_IGNORE = 30;
Expand Down Expand Up @@ -77,12 +84,12 @@ private String scrape(String imdbId) throws Exception {
}

var doc = Jsoup.parse(response.body());
var ratingValue = doc.select("span[class*=AggregateRatingButton__RatingScore]");
var ratingValue = doc.select("div[data-testid*=hero-rating-bar__aggregate-rating__score]");
boolean blacklistShort = true;

if(ratingValue.size() == 0) {
var canBeRated = doc.select("div[class*=RatingBar__ButtonContainer]");
var children = canBeRated.get(0).childNodeSize();
var canBeRated = doc.select("div[data-testid*=hero-rating-bar__user-rating__unrated]");
var children = canBeRated.size();

if(children > 0) {
if(!ImdbDockerImplementation.checkCapability(Capabilities.IGNORE_SCRAPER_NO_RESULT_LOG)) {
Expand All @@ -106,7 +113,7 @@ private String scrape(String imdbId) throws Exception {
throw new RuntimeException(String.format("Something went wrong with screen scraping the IMDB page for id %s (MORE_THAN_ONE_RESULT). Please contact developer by creating a GitHub issue and add this data: '%s'", imdbId, s));
}

String result = ratingValue.get(0).text();
String result = ratingValue.get(0).getAllElements().get(1).text().replace(",", ".");

try {
return result;
Expand Down
Loading

0 comments on commit 06d3fa9

Please sign in to comment.