diff --git a/baker/GrapherBaker.tsx b/baker/GrapherBaker.tsx index 8b5e74794f4..63ad2b3dbed 100644 --- a/baker/GrapherBaker.tsx +++ b/baker/GrapherBaker.tsx @@ -45,7 +45,7 @@ import { import * as db from "../db/db.js" import { glob } from "glob" import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js" -import { getPostBySlug } from "../db/model/Post.js" +import { bySlug, getPostBySlug, parsePostAuthors } from "../db/model/Post.js" import { GrapherInterface } from "@ourworldindata/grapher" import workerpool from "workerpool" import ProgressBar from "progress" @@ -60,11 +60,14 @@ import { getDatapageJson, parseGdocContentFromAllowedLevelOneHeadings, } from "../datapage/Datapage.js" +import { slugify_topic } from "../site/DataPageV2Content.js" import { ExplorerProgram } from "../explorer/ExplorerProgram.js" import { Image } from "../db/model/Image.js" import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" import { parseFaqs } from "../db/model/Gdoc/rawToEnriched.js" +import { Gdoc } from "../db/model/Gdoc/Gdoc.js" +import { getShortPageCitation } from "../site/gdocs/utils.js" /** * @@ -322,6 +325,42 @@ export async function renderDataPageV2({ grapherConfigForVariable ?? {} ) + const firstTopicTag = datapageData.topicTagsLinks?.[0] + + if (firstTopicTag) { + const gdoc = await Gdoc.findOne({ + where: { + slug: slugify_topic(firstTopicTag), + }, + relations: ["tags"], + }) + if (gdoc) { + const citation = getShortPageCitation( + gdoc.content.authors, + gdoc.content.title ?? "", + gdoc?.publishedAt + ) + datapageData.primaryTopic = { + topicTag: firstTopicTag, + citation, + } + } else { + const post = await bySlug(slugify_topic(firstTopicTag)) + if (post) { + const authors = parsePostAuthors(post.authors) + const citation = getShortPageCitation( + authors, + post.title, + post.published_at + ) + datapageData.primaryTopic = { + topicTag: firstTopicTag, + citation, + } + } + } + } + // Get the charts this variable is being used in (aka "related charts") // and exclude the current chart to avoid duplicates datapageData.allCharts = await getRelatedChartsForVariable( diff --git a/datapage/Datapage.ts b/datapage/Datapage.ts index 173383d7b5b..1a9dbbe3977 100644 --- a/datapage/Datapage.ts +++ b/datapage/Datapage.ts @@ -16,9 +16,9 @@ import { DataPageDataV2, OwidVariableWithSource, dayjs, - getAttributionFromVariable, gdocIdRegex, getETLPathComponents, + getAttributionFragmentsFromVariable, } from "@ourworldindata/utils" import { ExplorerProgram } from "../explorer/ExplorerProgram.js" import { Gdoc } from "../db/model/Gdoc/Gdoc.js" @@ -29,7 +29,7 @@ export const getDatapageDataV2 = async ( partialGrapherConfig: GrapherInterface ): Promise => { { - const processingLevel = variableMetadata.processingLevel ?? "major" + const processingLevel = variableMetadata.processingLevel ?? "minor" const version = getETLPathComponents(variableMetadata.catalogPath ?? "")?.version ?? "" @@ -59,7 +59,7 @@ export const getDatapageDataV2 = async ( attributionShort: variableMetadata.presentation?.attributionShort, titleVariant: variableMetadata.presentation?.titleVariant, topicTagsLinks: variableMetadata.presentation?.topicTagsLinks ?? [], - attribution: getAttributionFromVariable(variableMetadata), + attributions: getAttributionFragmentsFromVariable(variableMetadata), faqs: [], descriptionKey: variableMetadata.descriptionKey ?? [], descriptionProcessing: variableMetadata.descriptionProcessing, diff --git a/db/migrateWpPostsToArchieMl.ts b/db/migrateWpPostsToArchieMl.ts index 1269b944d96..45969ff0e6d 100644 --- a/db/migrateWpPostsToArchieMl.ts +++ b/db/migrateWpPostsToArchieMl.ts @@ -4,7 +4,6 @@ import cheerio from "cheerio" import { OwidGdocPublicationContext, OwidGdocInterface, - sortBy, OwidArticleBackportingStatistics, OwidGdocType, RelatedChart, @@ -19,6 +18,7 @@ import { adjustHeadingLevels, } from "./model/Gdoc/htmlToEnriched.js" import { getRelatedCharts } from "./wpdb.js" +import { parsePostAuthors } from "./model/Post.js" // slugs from all the linear entries we want to migrate from @edomt const entries = new Set([ @@ -149,10 +149,6 @@ const migrate = async (): Promise => { ? post.published_at.toLocaleDateString("en-US", options) : "" - const authors: { author: string; order: number }[] = JSON.parse( - post.authors - ) - const archieMlFieldContent: OwidGdocInterface = { id: `wp-${post.id}`, slug: post.slug, @@ -162,9 +158,7 @@ const migrate = async (): Promise => { title: post.title, subtitle: post.excerpt, excerpt: post.excerpt, - authors: sortBy(authors, ["order"]).map( - (author) => author.author - ), + authors: parsePostAuthors(post.authors), dateline: dateline, // TODO: this discards block level elements - those might be needed? refs: undefined, diff --git a/db/model/Post.ts b/db/model/Post.ts index fbe0d958433..eebee925001 100644 --- a/db/model/Post.ts +++ b/db/model/Post.ts @@ -1,6 +1,6 @@ import * as db from "../db.js" import { Knex } from "knex" -import { PostRow } from "@ourworldindata/utils" +import { PostRow, sortBy } from "@ourworldindata/utils" export const postsTable = "posts" @@ -49,6 +49,14 @@ export const setTags = async ( export const bySlug = async (slug: string): Promise => (await db.knexTable("posts").where({ slug: slug }))[0] +/** The authors field in the posts table is a json column that contains an array of + { order: 1, authors: "Max Mustermann" } like records. This function parses the + string and returns a simple string array of author names in the correct order */ +export const parsePostAuthors = (authorsJson: string): string[] => { + const authors = JSON.parse(authorsJson) + return sortBy(authors, ["order"]).map((author) => author.author) +} + export const setTagsForPost = async ( postId: number, tagIds: number[] diff --git a/packages/@ourworldindata/grapher/src/core/Grapher.tsx b/packages/@ourworldindata/grapher/src/core/Grapher.tsx index 12855174dd6..58c1433a245 100644 --- a/packages/@ourworldindata/grapher/src/core/Grapher.tsx +++ b/packages/@ourworldindata/grapher/src/core/Grapher.tsx @@ -1535,7 +1535,8 @@ export class Grapher const uniqueAttributions = uniq(compact(attributions)) - if (uniqueAttributions.length > 3) return "Multiple sources" + if (uniqueAttributions.length > 3) + return `${attributions[0]} and other sources` return uniqueAttributions.join("; ") } diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index a6e596534a4..585f53fbe73 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -1743,16 +1743,22 @@ export function getOriginAttributionFragments( export function getAttributionFromVariable( variable: OwidVariableWithSource ): string { + return getAttributionFragmentsFromVariable(variable).join("; ") +} + +export function getAttributionFragmentsFromVariable( + variable: OwidVariableWithSource +): string[] { if ( variable.presentation?.attribution && variable.presentation?.attribution !== "" ) - return variable.presentation?.attribution + return [variable.presentation?.attribution] const originAttributionFragments = getOriginAttributionFragments( variable.origins ) const sourceName = variable.source?.name - return uniq(compact([sourceName, ...originAttributionFragments])).join("; ") + return uniq(compact([sourceName, ...originAttributionFragments])) } interface ETLPathComponents { @@ -1769,3 +1775,22 @@ export const getETLPathComponents = (path: string): ETLPathComponents => { path.split("/") return { channel, producer, version, dataset, table, indicator } } + +export const formatAuthors = ({ + authors, + requireMax, + forBibtex, +}: { + authors: string[] + requireMax?: boolean + forBibtex?: boolean +}): string => { + if (requireMax && !authors.includes("Max Roser")) + authors = [...authors, "Max Roser"] + + let authorsText = authors.slice(0, -1).join(forBibtex ? " and " : ", ") + if (authorsText.length === 0) authorsText = authors[0] + else authorsText += ` and ${last(authors)}` + + return authorsText +} diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index 97a91a7cbf5..5cdf2bd106e 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -215,6 +215,7 @@ export { type DataPageDataV2, type DataPageRelatedData, type DataPageRelatedResearch, + type PrimaryTopic, type FaqLink, type FaqDictionary, type RawBlockResearchAndWritingRow, @@ -340,8 +341,10 @@ export { mergePartialGrapherConfigs, getOriginAttributionFragments, getAttributionFromVariable, + getAttributionFragmentsFromVariable, copyToClipboard, getETLPathComponents, + formatAuthors, } from "./Util.js" export { diff --git a/packages/@ourworldindata/utils/src/owidTypes.ts b/packages/@ourworldindata/utils/src/owidTypes.ts index a2a88d4d904..3079ed75ddc 100644 --- a/packages/@ourworldindata/utils/src/owidTypes.ts +++ b/packages/@ourworldindata/utils/src/owidTypes.ts @@ -1464,13 +1464,19 @@ export interface FaqLink { fragmentId: string } +export interface PrimaryTopic { + topicTag: string + citation: string +} + export interface DataPageDataV2 { status: "published" | "draft" title: string titleVariant?: string attributionShort?: string topicTagsLinks?: string[] - attribution: string + primaryTopic?: PrimaryTopic + attributions: string[] descriptionShort?: string descriptionFromProducer?: string faqs: FaqLink[] // Todo: resolve these at this level to the point where we can preview them @@ -1602,6 +1608,7 @@ export interface DataPageV2ContentFields { faqEntries: FaqEntryData | undefined // TODO: add gdocs for FAQs isPreviewing?: boolean + canonicalUrl: string } export interface UserCountryInformation { diff --git a/site/DataPageV2.tsx b/site/DataPageV2.tsx index 07124b73367..2c2d3a16afa 100644 --- a/site/DataPageV2.tsx +++ b/site/DataPageV2.tsx @@ -146,6 +146,7 @@ export const DataPageV2 = (props: { grapherConfig={grapherConfig} isPreviewing={isPreviewing} faqEntries={faqEntries} + canonicalUrl={canonicalUrl} /> diff --git a/site/DataPageV2Content.tsx b/site/DataPageV2Content.tsx index 7f70d940205..029ff2fd9b6 100644 --- a/site/DataPageV2Content.tsx +++ b/site/DataPageV2Content.tsx @@ -5,7 +5,6 @@ import { Grapher, GrapherInterface } from "@ourworldindata/grapher" import { ExpandableToggle } from "./ExpandableToggle.js" import ReactDOM from "react-dom" import { GrapherWithFallback } from "./GrapherWithFallback.js" -import { formatAuthors } from "./clientFormatting.js" import { ArticleBlocks } from "./gdocs/ArticleBlocks.js" import { RelatedCharts } from "./blocks/RelatedCharts.js" import { @@ -14,6 +13,10 @@ import { slugify, DATAPAGE_SOURCES_AND_PROCESSING_SECTION_ID, EnrichedBlockList, + uniq, + pick, + OwidOrigin, + formatAuthors, } from "@ourworldindata/utils" import { markdownToEnrichedTextBlock } from "@ourworldindata/components" import { AttachmentsContext, DocumentContext } from "./gdocs/OwidGdoc.js" @@ -73,24 +76,32 @@ const getDateRange = (dateRange: string): string | null => { return null } -const slugify_topic = (topic: string) => { +export const slugify_topic = (topic: string) => { // This is a heuristic to map from free form tag texts to topic page URLs. We'll // have to switch to explicitly stored URLs or explicit links between tags and topic pages // soon but for the time being this makes sure that "CO2 & Greenhouse Gas Emissions" can be automatically // linked to /co2-and-greenhouse-gas-emissions // Note that the heuristic fails for a few cases like "HIV/AIDS" or "Mpox (Monkeypox)" - const replaced = topic - .replace("&", "-and-") - .replace("'", "") - .replace("+", "") + const replaced = topic.replace("&", "and").replace("'", "").replace("+", "") return slugify(replaced) } +type OriginSubset = Pick< + OwidOrigin, + | "producer" + | "descriptionSnapshot" + | "dateAccessed" + | "urlMain" + | "description" + | "citationFull" +> + export const DataPageV2Content = ({ datapageData, grapherConfig, isPreviewing = false, faqEntries, + canonicalUrl = "{URL}", // when we bake pages to their proper url this will be set correctly but on preview pages we leave this undefined }: DataPageV2ContentFields & { grapherConfig: GrapherInterface }) => { @@ -141,29 +152,41 @@ export const DataPageV2Content = ({ : "related-data__category--columns span-cols-8 span-lg-cols-12" } ` - // TODO: this is missing the attribution field ATM and - // so assembles something only roughly similar to the citation described - // by Joe. Also, we need the dataset title. - const producers = datapageData.origins.map((o) => o.producer).join("; ") + const origins: OriginSubset[] = uniq( + datapageData.origins.map((item) => + pick(item, [ + "producer", + "descriptionSnapshot", + "dateAccessed", + "urlMain", + "description", + "citationFull", + ]) + ) + ) + const producers = uniq(datapageData.origins.map((o) => o.producer)) + + const attributionFragments = datapageData.attributions ?? producers + const attributionPotentiallyShortened = + attributionFragments.length > 3 + ? `${attributionFragments[0]} and other sources` + : attributionFragments.join(", ") const processedAdapted = - datapageData.owidProcessingLevel === "minor" ? `minor` : `major` + datapageData.owidProcessingLevel === "minor" + ? `minor processing` + : `major adaptations` const lastUpdated = dayjs(datapageData.lastUpdated, ["YYYY", "YYYY-MM-DD"]) const yearOfUpdate = lastUpdated.year() - const citationShort = `${producers} — with ${processedAdapted} processing by Our World In Data (${yearOfUpdate})` - const originsLong = datapageData.origins - .map((o) => `${o.producer}, ${o.title ?? o.titleSnapshot}`) - .join("; ") - const dateAccessed = - datapageData.origins && - datapageData.origins.length && - datapageData.origins[0].dateAccessed - ? dayjs(datapageData.origins[0].dateAccessed).format("MMMM D, YYYY") - : "" - const urlAccessed = - datapageData.origins && - datapageData.origins.length && - datapageData.origins[0].urlDownload - const citationLong = `${citationShort}. ${datapageData.title}. ${originsLong}, ${processedAdapted} by Our World In Data. Retrieved ${dateAccessed} from ${urlAccessed}` + const citationShort = `${attributionPotentiallyShortened} – with ${processedAdapted} by Our World In Data (${yearOfUpdate})` + const citationLonger = `${attributionPotentiallyShortened} – with ${processedAdapted} by Our World In Data (${yearOfUpdate})` + const originsLong = uniq( + datapageData.origins.map( + (o) => `${o.producer}, ${o.title ?? o.titleSnapshot}` + ) + ).join("; ") + const today = dayjs().format("MMMM D, YYYY") + const currentYear = dayjs().year() + const citationLong = `${citationLonger}. ${datapageData.title}. ${originsLong}. Retrieved ${today} from ${canonicalUrl}` const { linkedDocuments = {}, @@ -201,9 +224,31 @@ export const DataPageV2Content = ({ /> ) : null + const citationFullBlockFn = (source: OriginSubset) => { + source.citationFull && ( +
+
Citation
+ This is the citation of the original data obtained from the + source, prior to any processing or adaptation by Our World in + Data. To cite data downloaded from this page, please use the + suggested citation given in{" "} + Reuse This Work below. + +
+ ) + } + const dateRange = getDateRange(datapageData.dateRange) - const citationDatapage = `Our World In Data (${yearOfUpdate}). Data Page: ${datapageData.title} – ${producers}. Retrieved from {url} [online resource]` + const citationDatapage = datapageData.primaryTopic + ? `“Data Page: ${datapageData.title}”, part of the following publication: ${datapageData.primaryTopic.citation}. Data adapted from ${producers}. Retrieved from ${canonicalUrl} [online resource]` + : `“Data Page: ${datapageData.title}”. Our World in Data (${currentYear}). Data adapted from ${producers}. Retrieved from ${canonicalUrl} [online resource]` + return ( Source -
{datapageData.attribution}
- {datapageData.owidProcessingLevel && ( -
- with{" "} - - {processedAdapted} - {" processing"} - {" "} - by Our World In Data -
- )} +
+ {datapageData.attributions} – with{" "} + + {processedAdapted} + {" "} + by Our World In Data +
@@ -552,14 +593,14 @@ export const DataPageV2Content = ({ > Sources and processing - {datapageData.origins.length > 0 && ( + {origins.length > 0 && (

This data is based on the following sources

- {datapageData.origins.map( + {origins.map( ( source, idx: number, @@ -596,7 +637,7 @@ export const DataPageV2Content = ({ /> )} {(source.dateAccessed || - source.urlDownload) && ( + source.urlMain) && (
)} - {source.urlDownload && ( + {source.urlMain && (
Retrieved @@ -626,84 +667,20 @@ export const DataPageV2Content = ({
)} - {source.citationFull && ( -
-
- Citation -
- This - is - the - citation - of - the - original - data - obtained - from - the - source, - prior - to - any - processing - or - adaptation - by - Our - World - in - Data. - To - cite - data - downloaded - from - this - page, - please - use - the - suggested - citation - given - in{" "} - - Reuse - This - Work - {" "} - below. - -
+ {citationFullBlockFn( + source )}
)} @@ -888,7 +865,8 @@ export const DataPageV2Content = ({

To cite this page overall, including any - descriptions of the data + descriptions, FAQs or + explanations of the data authored by Our World in Data, please use the following citation: diff --git a/site/ExpandableToggle.scss b/site/ExpandableToggle.scss index 568150f2585..c83d9ea151a 100644 --- a/site/ExpandableToggle.scss +++ b/site/ExpandableToggle.scss @@ -53,7 +53,7 @@ } .ExpandableToggle__content--teaser { - height: 48px; + height: 96px; -webkit-mask-image: linear-gradient(180deg, #000 0%, transparent); } diff --git a/site/gdocs/OwidGdoc.tsx b/site/gdocs/OwidGdoc.tsx index a89a47472f4..990e734e17a 100644 --- a/site/gdocs/OwidGdoc.tsx +++ b/site/gdocs/OwidGdoc.tsx @@ -13,14 +13,14 @@ import { LICENSE_ID, isEmpty, OwidGdocType, + formatAuthors, } from "@ourworldindata/utils" import { CodeSnippet } from "../blocks/CodeSnippet.js" import { BAKED_BASE_URL } from "../../settings/clientSettings.js" -import { formatAuthors } from "../clientFormatting.js" import { DebugProvider } from "./DebugContext.js" import { OwidGdocHeader } from "./OwidGdocHeader.js" import StickyNav from "../blocks/StickyNav.js" - +import { getShortPageCitation } from "./utils.js" export const AttachmentsContext = createContext<{ linkedCharts: Record linkedDocuments: Record @@ -64,11 +64,12 @@ export function OwidGdoc({ }: OwidGdocProps) { const citationDescription = citationDescriptionsByArticleType[content.type ?? OwidGdocType.Article] - const citationText = `${formatAuthors({ - authors: content.authors, - })} (${publishedAt?.getFullYear()}) - "${ - content.title - }". Published online at OurWorldInData.org. Retrieved from: '${`${BAKED_BASE_URL}/${slug}`}' [Online Resource]` + const shortPageCitation = getShortPageCitation( + content.authors, + content.title ?? "", + publishedAt + ) + const citationText = `${shortPageCitation} Published online at OurWorldInData.org. Retrieved from: '${`${BAKED_BASE_URL}/${slug}`}' [Online Resource]` const bibtex = `@article{owid-${slug.replace(/\//g, "-")}, author = {${formatAuthors({ diff --git a/site/gdocs/utils.tsx b/site/gdocs/utils.tsx index 54494923019..124a81d309a 100644 --- a/site/gdocs/utils.tsx +++ b/site/gdocs/utils.tsx @@ -7,8 +7,9 @@ import { OwidGdocInterface, ImageMetadata, LinkedChart, - OwidGdocContent, Url, + OwidGdocContent, + formatAuthors, } from "@ourworldindata/utils" import { match } from "ts-pattern" import { AttachmentsContext } from "./OwidGdoc.js" @@ -198,3 +199,13 @@ export function renderSpan( export function renderSpans(spans: Span[]): JSX.Element[] { return spans.map(renderSpan) } + +export function getShortPageCitation( + authors: string[], + title: string, + publishedAt: Date | null +) { + return `${formatAuthors({ + authors: authors, + })} (${publishedAt?.getFullYear()}) - “${title}”` +}