From 7d2b4296856fd10099f8e50f4be1698f9adbac2e Mon Sep 17 00:00:00 2001 From: Parthiv Date: Fri, 1 Sep 2023 20:43:23 +0530 Subject: [PATCH 1/6] Handle mul wikisource pages Modify Wikidata query to include mul wikisource and handle it in related methods as well Bug: T345325 --- src/Wikisource.php | 5 +++-- src/WikisourceApi.php | 15 +++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Wikisource.php b/src/Wikisource.php index 130ab45..6cf74a7 100644 --- a/src/Wikisource.php +++ b/src/Wikisource.php @@ -105,12 +105,13 @@ public function getLanguageName() { */ public function getDomainName(): string { $entity = $this->getWikisoureApi()->getWikdataEntity( $this->getWikidataId() ); - if ( isset( $entity['claims'][self::PROP_WEBSITE] ) ) { + // if it is mul wikisource, return base url without subdomain + if ( $this->getLanguageCode() !== 'mul' && isset( $entity['claims'][self::PROP_WEBSITE] ) ) { $website = $entity['claims'][self::PROP_WEBSITE][0]['mainsnak']['datavalue']['value']; $urlParts = parse_url( $website ); return $urlParts['host']; } - if ( $this->getLanguageCode() ) { + if ( $this->getLanguageCode() !== 'mul' && $this->getLanguageCode() ) { return $this->getLanguageCode() . '.wikisource.org'; } return 'wikisource.org'; diff --git a/src/WikisourceApi.php b/src/WikisourceApi.php index 15d3c7d..e1ec4ab 100644 --- a/src/WikisourceApi.php +++ b/src/WikisourceApi.php @@ -112,7 +112,7 @@ public function fetchWikisources( $cacheLifetime = null ) { // Language of work or name. . "?item wdt:P407 ?lang . " // RDF label of the language, in the language. - . "?lang rdfs:label ?langName . FILTER(LANG(?langName) = ?langCode) . " . "}"; + . "?lang rdfs:label ?langName . FILTER(LANG(?langName) = ?langCode || ?langCode = 'mul') . " . "}"; $wdQuery = new WikidataQuery( $query ); $data = $wdQuery->fetch(); if ( !is_numeric( $cacheLifetime ) ) { @@ -151,15 +151,22 @@ public function fetchWikisource( $langCode ) { * Get a Wikisource from a given URL. * @param string $url The Wikisource URL, with any path (or none). * @return Wikisource|bool The Wikisource requested, or false if the URL isn't a Wikisource - * URL (i.e. xxx.wikisource.org). + * URL (i.e. xxx.wikisource.org or wikisource.org). */ public function newWikisourceFromUrl( $url ) { - preg_match( '|//([a-z]{2,3}).wikisource.org|i', $url, $matches ); + // match wikisources with subdomain like xy.wikisource.org or xyz.wikisource.org + preg_match( '|//([a-z]{0,3})\.?wikisource.org|i', $url, $matches ); if ( !isset( $matches[1] ) ) { $this->logger->debug( "Unable to find Wikisource URL in: $url" ); return false; } - $langCode = $matches[1]; + // if wikisource.org, then set $langCode as mul + // indicating mul.wikisource.org + if ( $matches[1] == "" ) { + $langCode = "mul"; + } else { + $langCode = $matches[1]; + } $ws = new Wikisource( $this, $this->logger ); $ws->setLanguageCode( $langCode ); return $ws; From d7fc80dc52e8fdd6e556bda56ffdbc54bf48dc8f Mon Sep 17 00:00:00 2001 From: Parthiv Date: Fri, 1 Sep 2023 21:05:21 +0530 Subject: [PATCH 2/6] Modify if-clause operand order --- src/Wikisource.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Wikisource.php b/src/Wikisource.php index 6cf74a7..3db4db3 100644 --- a/src/Wikisource.php +++ b/src/Wikisource.php @@ -111,7 +111,7 @@ public function getDomainName(): string { $urlParts = parse_url( $website ); return $urlParts['host']; } - if ( $this->getLanguageCode() !== 'mul' && $this->getLanguageCode() ) { + if ( $this->getLanguageCode() && $this->getLanguageCode() !== 'mul' ) { return $this->getLanguageCode() . '.wikisource.org'; } return 'wikisource.org'; From 8b348b4b8062eb350f5c5d695868ff92f31c60b9 Mon Sep 17 00:00:00 2001 From: Parthiv Date: Mon, 11 Sep 2023 11:18:20 +0530 Subject: [PATCH 3/6] Modifying query to contain only one mulwikisource entry in the result --- src/WikisourceApi.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/WikisourceApi.php b/src/WikisourceApi.php index e1ec4ab..abe7687 100644 --- a/src/WikisourceApi.php +++ b/src/WikisourceApi.php @@ -104,7 +104,7 @@ public function fetchWikisources( $cacheLifetime = null ) { if ( $data === false ) { $this->logger->debug( "Requesting list of Wikisources from Wikidata" ); $query = - "SELECT ?item ?langCode ?langName WHERE { " + "SELECT ?item ?langCode ?langName WHERE { {" // Instance of Wikisource language edition. . "?item wdt:P31 wd:Q15156455 . " // Wikimedia language code. @@ -112,7 +112,16 @@ public function fetchWikisources( $cacheLifetime = null ) { // Language of work or name. . "?item wdt:P407 ?lang . " // RDF label of the language, in the language. - . "?lang rdfs:label ?langName . FILTER(LANG(?langName) = ?langCode || ?langCode = 'mul') . " . "}"; + . "?lang rdfs:label ?langName . FILTER(LANG(?langName) = ?langCode) ." + // perform a union of the results + . " } UNION { " + // Instance of Wikisource language edition. + . "?item wdt:P31 wd:Q15156455 ." + // Wikimedia language code. + . "?item wdt:P424 ?langCode ." + // language of work or name is multiple languages + . "?item wdt:P407 wd:Q20923490 . } }"; + echo $query; $wdQuery = new WikidataQuery( $query ); $data = $wdQuery->fetch(); if ( !is_numeric( $cacheLifetime ) ) { @@ -126,6 +135,9 @@ public function fetchWikisources( $cacheLifetime = null ) { $ws = new Wikisource( $this, $this->logger ); $ws->setLanguageCode( $langInfo['langCode'] ); $ws->setLanguageName( $langInfo['langName'] ); + if ( $langInfo['langCode'] == "mul" ) { + $ws->setLanguageName( "Multilingual Wikisource" ); + } $ws->setWikidataId( substr( $langInfo['item'], strlen( 'http://www.wikidata.org/entity/' ) ) ); $wikisources[] = $ws; } From a3fe141c29b23f35b0b735f3292851fb5b4b88bb Mon Sep 17 00:00:00 2001 From: Parthiv Date: Mon, 11 Sep 2023 12:21:17 +0530 Subject: [PATCH 4/6] Modify query and remove debug statement --- src/WikisourceApi.php | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/WikisourceApi.php b/src/WikisourceApi.php index abe7687..42fc8fb 100644 --- a/src/WikisourceApi.php +++ b/src/WikisourceApi.php @@ -120,8 +120,9 @@ public function fetchWikisources( $cacheLifetime = null ) { // Wikimedia language code. . "?item wdt:P424 ?langCode ." // language of work or name is multiple languages - . "?item wdt:P407 wd:Q20923490 . } }"; - echo $query; + . "?item wdt:P407 wd:Q20923490 . " + // hard code language name for mul + . "BIND( 'Multilingual Wikisource' AS ?langName ) } }"; $wdQuery = new WikidataQuery( $query ); $data = $wdQuery->fetch(); if ( !is_numeric( $cacheLifetime ) ) { @@ -135,9 +136,6 @@ public function fetchWikisources( $cacheLifetime = null ) { $ws = new Wikisource( $this, $this->logger ); $ws->setLanguageCode( $langInfo['langCode'] ); $ws->setLanguageName( $langInfo['langName'] ); - if ( $langInfo['langCode'] == "mul" ) { - $ws->setLanguageName( "Multilingual Wikisource" ); - } $ws->setWikidataId( substr( $langInfo['item'], strlen( 'http://www.wikidata.org/entity/' ) ) ); $wikisources[] = $ws; } From 8984293ca253c4bf3964bf9431dba30e75c10ad8 Mon Sep 17 00:00:00 2001 From: Parthiv Date: Mon, 11 Sep 2023 12:55:06 +0530 Subject: [PATCH 5/6] Modify query FILTER --- src/WikisourceApi.php | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/WikisourceApi.php b/src/WikisourceApi.php index 42fc8fb..7321391 100644 --- a/src/WikisourceApi.php +++ b/src/WikisourceApi.php @@ -104,7 +104,7 @@ public function fetchWikisources( $cacheLifetime = null ) { if ( $data === false ) { $this->logger->debug( "Requesting list of Wikisources from Wikidata" ); $query = - "SELECT ?item ?langCode ?langName WHERE { {" + "SELECT ?item ?langCode ?langName WHERE { " // Instance of Wikisource language edition. . "?item wdt:P31 wd:Q15156455 . " // Wikimedia language code. @@ -112,17 +112,8 @@ public function fetchWikisources( $cacheLifetime = null ) { // Language of work or name. . "?item wdt:P407 ?lang . " // RDF label of the language, in the language. - . "?lang rdfs:label ?langName . FILTER(LANG(?langName) = ?langCode) ." - // perform a union of the results - . " } UNION { " - // Instance of Wikisource language edition. - . "?item wdt:P31 wd:Q15156455 ." - // Wikimedia language code. - . "?item wdt:P424 ?langCode ." - // language of work or name is multiple languages - . "?item wdt:P407 wd:Q20923490 . " - // hard code language name for mul - . "BIND( 'Multilingual Wikisource' AS ?langName ) } }"; + // filter for mul wikisource + . "?lang rdfs:label ?langName . FILTER(LANG(?langName) = ?langCode || ( ?langCode = 'mul' && LANG(?langName) = 'en' )) . " . "}"; $wdQuery = new WikidataQuery( $query ); $data = $wdQuery->fetch(); if ( !is_numeric( $cacheLifetime ) ) { From a8536b1b5889debca1ffbb58313228aef1c269be Mon Sep 17 00:00:00 2001 From: Parthiv Date: Mon, 18 Sep 2023 10:14:49 +0530 Subject: [PATCH 6/6] Fix linting error --- src/WikisourceApi.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/WikisourceApi.php b/src/WikisourceApi.php index 7321391..cc7b799 100644 --- a/src/WikisourceApi.php +++ b/src/WikisourceApi.php @@ -113,7 +113,8 @@ public function fetchWikisources( $cacheLifetime = null ) { . "?item wdt:P407 ?lang . " // RDF label of the language, in the language. // filter for mul wikisource - . "?lang rdfs:label ?langName . FILTER(LANG(?langName) = ?langCode || ( ?langCode = 'mul' && LANG(?langName) = 'en' )) . " . "}"; + . "?lang rdfs:label ?langName . FILTER(LANG(?langName) = ?langCode || " + . "( ?langCode = 'mul' && LANG(?langName) = 'en' )) . " . "}"; $wdQuery = new WikidataQuery( $query ); $data = $wdQuery->fetch(); if ( !is_numeric( $cacheLifetime ) ) {