Skip to content

Commit

Permalink
fixed TemporaryIndex::updateEntry; ...
Browse files Browse the repository at this point in the history
- added test for TemporaryIndex::updateEntry
- renamed IndexEntry's (s)getLatestNtFile to (s)getLatestNtriplesFile
- refined composer.json and MergeInManuallyMaintainedMetadata
  • Loading branch information
k00ni committed May 20, 2024
1 parent 24266ad commit 62d1f6c
Show file tree
Hide file tree
Showing 11 changed files with 423 additions and 288 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ index:
prepare:
cd scripts && vendor/bin/php-cs-fixer fix
cd scripts && vendor/bin/phpstan
cd scripts && vendor/bin/phpunit
cd scripts && vendor/bin/phpunit --display-warnings
450 changes: 225 additions & 225 deletions index.csv

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions scripts/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"prefer-stable": true,
"require": {
"php": ">=8.2",
"ext-curl": "*",
"ext-json": "*",
"ext-zip": "*",
"php-curl-class/php-curl-class": "^9.19",
"sweetrdf/easyrdf": "^1.13.0",
"sweetrdf/in-memory-store-sqlite": "^1.1.0",
Expand Down
2 changes: 1 addition & 1 deletion scripts/phpunit.xml.dist
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/11.1/phpunit.xsd"
bootstrap="vendor/autoload.php"
bootstrap="bin/bootstrap.php"
cacheDirectory=".phpunit.cache"
executionOrder="depends,defects"
requireCoverageMetadata="false"
Expand Down
5 changes: 1 addition & 4 deletions scripts/src/Command/MergeInManuallyMaintainedMetadata.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ public function run(): void
continue;
}

// check if ontology URI is already known
$entryData = $this->temporaryIndex->getEntryDataAsArray((string) $row[1]);

// setup IndexEntry instance
$entry = $this->getPreparedIndexEntry();
$entry->setOntologyTitle($row[0]);
Expand All @@ -65,7 +62,7 @@ public function run(): void
// related files
$entry->setLatestJsonLdFile($row[8]);
$entry->setLatestN3File($row[9]);
$entry->setLatestNtFile($row[10]);
$entry->setLatestNtriplesFile($row[10]);
$entry->setLatestRdfXmlFile($row[11]);
$entry->setLatestTurtleFile($row[12]);

Expand Down
2 changes: 1 addition & 1 deletion scripts/src/Extractor/BioPortal.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public function run(): void

// set latest files based on format
if ('ntriples' == $format) {
$newEntry->setLatestNtFile($ontologyFile);
$newEntry->setLatestNtriplesFile($ontologyFile);
} elseif ('rdfxml' == $format) {
$newEntry->setLatestRdfXmlFile($ontologyFile);
} elseif ('turtle' == $format) {
Expand Down
12 changes: 6 additions & 6 deletions scripts/src/Extractor/DBpediaArchivo.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,20 @@ public function run(): void
echo '---------------------------------------------------------------------';
echo PHP_EOL;
echo 'Next: '.$indexEntry->getOntologyTitle();
echo ' >> '.$indexEntry->getLatestNtFile();
echo ' >> '.$indexEntry->getLatestNtriplesFile();

if (null === $indexEntry->getLatestNtFile() || isEmpty($indexEntry->getLatestNtFile())) {
if (null === $indexEntry->getLatestNtriplesFile() || isEmpty($indexEntry->getLatestNtriplesFile())) {
throw new Exception('No ntriples file path set!');
}

// fill remaining metadata by downloading RDF file to extract further meta data
try {
$fileHandle = $this->cache->getLocalFileResourceForFileUrl($indexEntry->getLatestNtFile());
$fileHandle = $this->cache->getLocalFileResourceForFileUrl($indexEntry->getLatestNtriplesFile());
if (false === is_resource($fileHandle)) {
throw new Exception('Could not open related file for '.$indexEntry->getLatestNtFile());
throw new Exception('Could not open related file for '.$indexEntry->getLatestNtriplesFile());
}

$localFilePath = $this->cache->getCachedFilePathForFileUrl($indexEntry->getLatestNtFile());
$localFilePath = $this->cache->getCachedFilePathForFileUrl($indexEntry->getLatestNtriplesFile());
$graph = $this->loadQuadsIntoGraph($fileHandle, $localFilePath, 'ntriples');
fclose($fileHandle);
} catch (Exception $e) {
Expand Down Expand Up @@ -151,7 +151,7 @@ public function getOntologiesToProcess(): array
* latest OWL,TTL,... file
*/
$iriUrlEncoded = urlencode((string) $newEntry->getOntologyIri());
$newEntry->setLatestNtFile('http://archivo.dbpedia.org/download?o='.$iriUrlEncoded.'&f=nt');
$newEntry->setLatestNtriplesFile('http://archivo.dbpedia.org/download?o='.$iriUrlEncoded.'&f=nt');
$newEntry->setLatestRdfXmlFile('http://archivo.dbpedia.org/download?o='.$iriUrlEncoded.'&f=owl');
$newEntry->setLatestTurtleFile('http://archivo.dbpedia.org/download?o='.$iriUrlEncoded.'&f=ttl');

Expand Down
2 changes: 1 addition & 1 deletion scripts/src/Extractor/OntologyLookupService.php
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ public function run(): void
if ('json' == $format) {
$newEntry->setLatestJsonLdFile($ontologyFileLocation);
} elseif ('ntriples' == $format) {
$newEntry->setLatestNtFile($ontologyFileLocation);
$newEntry->setLatestNtriplesFile($ontologyFileLocation);
} elseif ('rdfxml' == $format) {
$newEntry->setLatestRdfXmlFile($ontologyFileLocation);
} elseif ('turtle' == $format) {
Expand Down
18 changes: 9 additions & 9 deletions scripts/src/IndexEntry.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class IndexEntry

private string|null $latestJsonLdFile = null;
private string|null $latestN3File = null;
private string|null $latestNtFile = null;
private string|null $latestNtriplesFile = null;
private string|null $latestRdfXmlFile = null;
private string|null $latestTurtleFile = null;

Expand Down Expand Up @@ -174,22 +174,22 @@ public function setLatestN3File(string|null $latestN3File): self
}
}

public function getLatestNtFile(): string|null
public function getLatestNtriplesFile(): string|null
{
return $this->latestNtFile;
return $this->latestNtriplesFile;
}

/**
* @throws \Exception if latestNtFile is nota valid URL.
* @throws \Exception if latestNtriplesFile is nota valid URL.
*/
public function setLatestNtFile(string|null $latestNtFile): self
public function setLatestNtriplesFile(string|null $latestNtriplesFile): self
{
if (isUrl($latestNtFile) || isEmpty($latestNtFile)) {
$this->latestNtFile = trim((string) $latestNtFile);
if (isUrl($latestNtriplesFile) || isEmpty($latestNtriplesFile)) {
$this->latestNtriplesFile = trim((string) $latestNtriplesFile);

return $this;
} else {
throw new Exception($latestNtFile.' is not a valid URL');
throw new Exception($latestNtriplesFile.' is not a valid URL');
}
}

Expand Down Expand Up @@ -293,7 +293,7 @@ public function isValid(): bool
|| (
isEmpty($this->getLatestJsonLdFile())
&& isEmpty($this->getLatestN3File())
&& isEmpty($this->getLatestNtFile())
&& isEmpty($this->getLatestNtriplesFile())
&& isEmpty($this->getLatestRdfXmlFile())
&& isEmpty($this->getLatestTurtleFile())
)
Expand Down
145 changes: 105 additions & 40 deletions scripts/src/TemporaryIndex.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@ class TemporaryIndex
/**
* @throws \PDOException
*/
public function __construct()
public function __construct(string|null $customPath)
{
// create/open SQLite file with the temporary index
$this->temporaryIndexDb = new PDO('sqlite:'.SQLITE_FILE_PATH);
$this->temporaryIndexDb = new PDO('sqlite:'.$customPath);

$this->temporaryIndexDb->exec('CREATE TABLE IF NOT EXISTS entry (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ontology_title TEXT,
Expand Down Expand Up @@ -134,7 +135,7 @@ public function storeEntries(array $temporaryIndex): void
// files
$indexEntry->getLatestJsonLdFile(),
$indexEntry->getLatestN3File(),
$indexEntry->getLatestNtFile(),
$indexEntry->getLatestNtriplesFile(),
$indexEntry->getLatestRdfXmlFile(),
$indexEntry->getLatestTurtleFile(),
// misc
Expand All @@ -159,56 +160,120 @@ public function storeEntries(array $temporaryIndex): void
}

/**
* Attempts to update all changed fields of an entry in the DB.
*
* @throws \Exception
* @throws \PDOException
*/
public function updateEntry(IndexEntry $indexEntry): void
public function updateEntry(IndexEntry $entry): void
{
if ($this->hasEntry((string) $indexEntry->getOntologyIri())) {
// build UPDATE statement
$stmt = $this->temporaryIndexDb->prepare('SELECT * FROM entry WHERE ontology_iri = ?');
$stmt->execute([$entry->getOntologyIri()]);
$row = $stmt->fetch(PDO::FETCH_ASSOC);

if (is_array($row)) {
$sql = 'UPDATE entry SET ';
$setEntries = [];
$params = [];

$i = 0;
foreach ($this->columnList as $column) {
if (0 < $i++) {
$sql .= ', ';
}
$sql .= $column.' = COALESCE('.$column.', ?)';
if ($entry->getOntologyTitle() != $row['ontology_title'] && isEmpty($row['ontology_title'])) {
$setEntries[] = 'ontology_title = ?';
$params[] = addslashes((string) $entry->getOntologyTitle());
}

if ($entry->getSummary() != $row['summary'] && isEmpty($row['summary'])) {
$setEntries[] = 'summary = ?';
$params[] = addslashes((string) $entry->getSummary());
}

if ($entry->getLicenseInformation() != $row['license_information'] && isEmpty($row['license_information'])) {
$setEntries[] = 'license_information = ?';
$params[] = addslashes((string) $entry->getLicenseInformation());
}

if ($entry->getAuthors() != $row['authors'] && isEmpty($row['authors'])) {
$setEntries[] = 'authors = ?';
$params[] = addslashes((string) $entry->getAuthors());
}

if ($entry->getContributors() != $row['contributors'] && isEmpty($row['contributors'])) {
$setEntries[] = 'contributors = ?';
$params[] = addslashes((string) $entry->getContributors());
}

if ($entry->getProjectPage() != $row['project_page'] && isEmpty($row['project_page'])) {
$setEntries[] = 'ontology_title = ?';
$params[] = addslashes((string) $entry->getProjectPage());
}

if ($entry->getSourcePage() != $row['source_page'] && isEmpty($row['source_page'])) {
$setEntries[] = 'source_page = ?';
$params[] = addslashes((string) $entry->getSourcePage());
}

if ($entry->getLatestJsonLdFile() != $row['latest_json_ld_file'] && isEmpty($row['latest_json_ld_file'])) {
$setEntries[] = 'latest_json_ld_file = ?';
$params[] = addslashes((string) $entry->getLatestJsonLdFile());
}

if ($entry->getLatestN3File() != $row['latest_n3_file'] && isEmpty($row['latest_n3_file'])) {
$setEntries[] = 'latest_n3_file = ?';
$params[] = addslashes((string) $entry->getLatestN3File());
}

if ($entry->getLatestNtriplesFile() != $row['latest_ntriples_file'] && isEmpty($row['latest_ntriples_file'])) {
$setEntries[] = 'latest_ntriples_file = ?';
$params[] = addslashes((string) $entry->getLatestNtriplesFile());
}

if ($entry->getLatestRdfXmlFile() != $row['latest_rdfxml_file'] && isEmpty($row['latest_rdfxml_file'])) {
$setEntries[] = 'latest_rdfxml_file = ?';
$params[] = addslashes((string) $entry->getLatestRdfXmlFile());
}

if ($entry->getLatestTurtleFile() != $row['latest_turtle_file'] && isEmpty($row['latest_turtle_file'])) {
$setEntries[] = 'latest_turtle_file = ?';
$params[] = addslashes((string) $entry->getLatestTurtleFile());
}

if ($entry->getModified() != $row['modified'] && isEmpty($row['modified'])) {
$setEntries[] = 'modified = ?';
$params[] = addslashes((string) $entry->getModified());
}

if ($entry->getVersion() != $row['version'] && isEmpty($row['version'])) {
$setEntries[] = 'version = ?';
$params[] = addslashes((string) $entry->getVersion());
}

if (0 == count($setEntries)) {
return;
}

$params[] = $entry->getOntologyIri();

$sql .= implode(', ', $setEntries);
$sql .= ' WHERE ontology_iri = ?';

// prepare and execute update statement
$param = [
addslashes((string) $indexEntry->getOntologyTitle()),
addslashes((string) $indexEntry->getOntologyIri()),
// general information
isEmpty($indexEntry->getSummary()) ? null : addslashes((string) $indexEntry->getSummary()),
isEmpty($indexEntry->getLicenseInformation()) ? null : (string) $indexEntry->getLicenseInformation(),
isEmpty($indexEntry->getAuthors()) ? null : addslashes((string) $indexEntry->getAuthors()),
isEmpty($indexEntry->getContributors()) ? null : addslashes((string) $indexEntry->getContributors()),
isEmpty($indexEntry->getProjectPage()) ? null : $indexEntry->getProjectPage(),
isEmpty($indexEntry->getSourcePage()) ? null : $indexEntry->getSourcePage(),
// files
isEmpty($indexEntry->getLatestJsonLdFile()) ? null : $indexEntry->getLatestJsonLdFile(),
isEmpty($indexEntry->getLatestN3File()) ? null : $indexEntry->getLatestN3File(),
isEmpty($indexEntry->getLatestNtFile()) ? null : $indexEntry->getLatestNtFile(),
isEmpty($indexEntry->getLatestRdfXmlFile()) ? null : $indexEntry->getLatestRdfXmlFile(),
isEmpty($indexEntry->getLatestTurtleFile()) ? null : $indexEntry->getLatestTurtleFile(),
// misc
isEmpty($indexEntry->getModified()) ? null : $indexEntry->getModified(),
isEmpty($indexEntry->getVersion()) ? null : $indexEntry->getVersion(),
// source
$indexEntry->getSourceTitle(),
$indexEntry->getSourceUrl(),
$indexEntry->getOntologyIri(),
];
echo PHP_EOL.' => '.$indexEntry->getContributors();
$this->temporaryIndexDb->prepare($sql)->execute($param);
$this->sendUpdateStmt($sql, $params);

} else {
// not there, do nothing
throw new Exception('No entry found for '.$entry->getOntologyIri());
}
}

/**
* @param non-empty-string $sql
* @param array<string|int|float|null> $param
*
* @throws \PDOException
*/
private function sendUpdateStmt(string $sql, array $param): void
{
$stmt = $this->temporaryIndexDb->prepare($sql);
$stmt->execute($param);
}

/**
* @throws \PDOException
*/
Expand Down
70 changes: 70 additions & 0 deletions scripts/tests/TemporaryIndexTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?php

namespace Tests;

use App\IndexEntry;
use App\TemporaryIndex;
use Test\TestCase;

class TemporaryIndexTest extends TestCase
{
protected function setUp(): void
{
parent::setUp();

if (file_exists(VAR_FOLDER_PATH.'test.db')) {
unlink(VAR_FOLDER_PATH.'test.db');
}
}

public function testUpdateEntry(): void
{
$subjectUnderTest = new TemporaryIndex(VAR_FOLDER_PATH.'test.db');

$newEntry = new IndexEntry('test1', 'test2');
$newEntry->setLatestJsonLdFile('http://localhost/test.ttl');
$newEntry->setOntologyIri('http://localhost/');
$newEntry->setOntologyTitle('test onto');

$subjectUnderTest->storeEntries([$newEntry]);

// check that entry is in DB
$this->assertTrue($subjectUnderTest->hasEntry($newEntry->getOntologyIri()));

// update entry in the meantime
$newEntry->setLicenseInformation('test license');

// update entry in DB
$subjectUnderTest->updateEntry($newEntry);

$entryArr = $subjectUnderTest->getEntryDataAsArray($newEntry->getOntologyIri());

$this->assertEquals($newEntry->getLicenseInformation(), $entryArr['license_information']);
}

public function testUpdateEntryKeepExistingValues(): void
{
$subjectUnderTest = new TemporaryIndex(VAR_FOLDER_PATH.'test.db');

$newEntry = new IndexEntry('test1', 'test2');
$newEntry->setLatestJsonLdFile('http://localhost/test.ttl');
$newEntry->setOntologyIri('http://localhost/');
$newEntry->setOntologyTitle('test onto');
$newEntry->setVersion('test version');

$subjectUnderTest->storeEntries([$newEntry]);

// check that entry is in DB
$this->assertTrue($subjectUnderTest->hasEntry($newEntry->getOntologyIri()));

// update entry in the meantime
$newEntry->setVersion('CHANGED version');

// update entry in DB
$subjectUnderTest->updateEntry($newEntry);

$entryArr = $subjectUnderTest->getEntryDataAsArray($newEntry->getOntologyIri());

$this->assertEquals('test version', $entryArr['version']);
}
}

0 comments on commit 62d1f6c

Please sign in to comment.