Skip to content

Commit

Permalink
add: introduce title status schema for enhanced title analysis in lin…
Browse files Browse the repository at this point in the history
…guistic processing
  • Loading branch information
simon-clematide committed Jan 2, 2025
1 parent c7fcd35 commit 1813659
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions json/linguistic_annotation/lingproc.v2.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,42 @@
"title": "The Max Chars Schema",
"description": "Maximum number of characters for a valid content item (including title and full text)",
"examples": [50000]
},
"title_status": {
"type": "object",
"title": "The Title Status Schema",
"description": "Status of the title with respect to full text content based on various criteria",
"properties": {
"exact_prefix": {
"type": "boolean",
"description": "True if title is an exact case-sensitive prefix of full text"
},
"ellipsis": {
"type": "boolean",
"description": "Is there an ellipsis added to the title (that cannot be found in the text)? ... "
},
"alnum_prefix": {
"type": "boolean",
"description": "When reducing title and full text to alphanumeric characters, is the title a prefix of the full text?"
},
"alnum_infix": {
"type": "boolean",
"description": "When reducing title and full text to alphanumeric characters, is the title an infix of the full text? Sometimes the real title comes second (because of page numbers or secondary titles that come first)"
},
"unknown": {
"type": "boolean",
"description": "Is there an artificial title (UNKWNON, UNTITLED) that indicates that there is no title"
},
"title_longer": {
"type": "boolean",

"description": "True if title is longer than full text which indicates a weird problematic situation."
},
"advertisement": {
"type": "boolean",
"description": "For some newspapers, advertisements have an artifical title consisting of a serial number of a an advertisement and the page. This is not a real title. Examples: Adv.7 Page 4, "
}
}
}
}
}

0 comments on commit 1813659

Please sign in to comment.