From ec6543c1229b6d69003b04be3290875384c2e388 Mon Sep 17 00:00:00 2001 From: Matteo Romanello Date: Mon, 12 Aug 2019 14:26:28 +0200 Subject: [PATCH] added local notebook --- notebooks/compress_canonical.ipynb | 1930 ++++++++++++++++++++++++++++ 1 file changed, 1930 insertions(+) create mode 100644 notebooks/compress_canonical.ipynb diff --git a/notebooks/compress_canonical.ipynb b/notebooks/compress_canonical.ipynb new file mode 100644 index 00000000..ea3d00b5 --- /dev/null +++ b/notebooks/compress_canonical.ipynb @@ -0,0 +1,1930 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Rationale**: code to re-package the canonical data in a more compressed format see [impresso-text-acquisition issue \\#26](https://github.com/impresso/impresso-text-acquisition/issues/26)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from impresso_commons.path.path_fs import (KNOWN_JOURNALS,\n", + " detect_canonical_issues)\n", + "import dask.bag as db\n", + "import jsonlines\n", + "from dask.distributed import Client, progress\n", + "import os\n", + "from smart_open import smart_open" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "from impresso_commons.text.rebuilder import upload" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "upload?" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "input_dir = \"/scratch/matteo/ingested/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "local_issues = detect_canonical_issues(\n", + " input_dir,\n", + " KNOWN_JOURNALS\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "100251" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(local_issues)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "IssueDirectory(journal='GDL', date=datetime.date(1964, 7, 16), edition='a', path='/scratch/matteo/ingested/GDL/1964/07/16/a')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "local_issues[100]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "dask_client = Client('iccluster036.iccluster.epfl.ch:8786')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 36
  • \n", + "
  • Cores: 36
  • \n", + "
  • Memory: 360.00 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dask_client" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "issue_bag = db.from_sequence(local_issues)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "grouped_bag = issue_bag.groupby(lambda issue: f'{issue.journal}-{issue.date.year}')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def find_issue_files(key, issues):\n", + " issue_files = []\n", + " for issue in issues:\n", + " basedir = issue.path\n", + " try:\n", + " filename = [\n", + " file \n", + " for file in os.listdir(basedir) \n", + " if 'issue.json' in file\n", + " ][0]\n", + " issue_files.append(os.path.join(basedir, filename))\n", + " except:\n", + " pass\n", + " return (key, issue_files)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def find_page_files(key, issues):\n", + " page_files = []\n", + " for issue in issues:\n", + " basedir = issue.path\n", + " try:\n", + " page_filenames = [\n", + " os.path.join(basedir, file)\n", + " for file in os.listdir(basedir) \n", + " if '-p' in file\n", + " ]\n", + " page_files += page_filenames\n", + " except:\n", + " pass\n", + " return (key, page_files)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: move this function to the codebase, it's generic enough!\n", + "def compress(key, json_files, output_dir, prefix=\"\"):\n", + " \"\"\"Merges a set of JSON line files into a single compressed archive.\n", + "\n", + " :param key: signature of the newspaper issue (e.g. GDL-1900)\n", + " :type key: str\n", + " :param json_files: input JSON line files\n", + " :type json_files: list\n", + " :param output_dir: directory where to write the output file\n", + " :type outp_dir: str\n", + " :return: a tuple with: sorting key [0] and path to serialized file [1].\n", + " :rytpe: tuple\n", + "\n", + " .. note::\n", + "\n", + " `sort_key` is expected to be the concatenation of newspaper ID and year\n", + " (e.g. GDL-1900).\n", + " \"\"\"\n", + "\n", + " \n", + " newspaper, year = key.split('-')\n", + " prefix_string = \"\" if prefix == \"\" else f\"-{prefix}\"\n", + " filename = f'{newspaper}-{year}{prefix_string}.jsonl.bz2'\n", + " filepath = os.path.join(output_dir, filename)\n", + " print(f'Compressing {len(json_files)} JSON files into {filepath}')\n", + "\n", + " with smart_open(filepath, 'wb') as fout:\n", + " writer = jsonlines.Writer(fout)\n", + "\n", + " for json_file in json_files:\n", + " with open(json_file, 'r') as inpf:\n", + " reader = jsonlines.Reader(inpf)\n", + " items = list(reader)\n", + " writer.write_all(items)\n", + " print(\n", + " f'Written {len(items)} docs from {json_file} to {filepath}'\n", + " )\n", + "\n", + " writer.close()\n", + "\n", + " return (key, filepath)\n", + " print(len(json_files))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "issue_bag = grouped_bag.starmap(find_issue_files)\\\n", + " .starmap(compress, prefix=\"issues\", output_dir='/scratch/matteo/impresso-canonical-compressed/issues/')\\\n", + " .persist()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bacbef3505e2445bbd87b2c9f2993f41", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "progress(issue_bag)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5bcc9b4a717c4bd5ba429f604771cbf2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "progress(pages_bag)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'running'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dask_client.status" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "pages_bag = grouped_bag.starmap(find_page_files)\\\n", + " .starmap(compress, prefix=\"pages\", output_dir='/scratch/matteo/impresso-canonical-compressed/pages/')\\\n", + " .persist()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(('GDL-1908',\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1908-pages.jsonl.bz2'),)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pages_bag.take(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(('GDL-1908',\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1908-issues.jsonl.bz2'),)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issue_bag.take(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1908-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1997-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1862-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1890-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1901-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1900-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1870-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1965-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1880-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1839-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1860-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1836-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1921-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1799-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1884-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1908-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1984-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1879-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1835-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1977-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1813-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1819-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1883-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1817-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1985-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1901-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1869-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1939-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1868-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1881-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1907-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1906-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1894-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1843-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1976-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1962-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1926-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1947-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1970-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1856-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1898-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1994-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1979-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1955-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1903-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1888-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1886-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1971-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1821-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1964-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1921-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1839-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1895-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1844-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1937-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1861-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1950-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1941-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1905-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1832-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1903-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1938-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1872-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1883-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1930-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1958-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1858-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1884-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1815-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1871-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1974-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1913-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1914-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1837-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1934-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1833-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1994-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1811-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1809-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1886-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1890-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1889-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1942-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1948-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1855-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1833-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1951-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1984-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1900-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1847-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1968-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1942-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1983-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1944-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1916-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1943-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1917-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1808-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1947-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1830-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1825-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1966-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1961-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1963-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1936-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1986-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1912-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1894-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1863-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1853-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1973-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1924-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1831-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1980-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1798-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1920-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1952-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1911-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1852-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1820-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1919-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1896-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1962-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1854-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1922-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1946-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1893-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1829-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1874-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1804-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1904-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1863-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1957-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1909-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1997-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1944-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1853-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1910-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1879-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1928-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1878-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1807-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1848-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1851-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1876-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1914-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1996-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1929-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1915-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1985-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1850-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1934-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1847-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1887-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1986-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1978-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1931-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1845-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1875-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1975-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1836-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1838-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1961-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1950-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1824-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1989-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1830-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1932-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1992-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1912-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1881-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1842-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1928-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1855-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1902-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1956-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1978-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1983-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1849-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1864-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1904-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1992-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1991-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1869-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1911-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1892-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1907-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1875-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1922-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1856-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1872-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1954-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1967-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1995-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1873-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1926-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1834-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1866-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1941-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1960-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1864-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1945-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1867-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1965-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1859-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1935-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1899-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1851-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1909-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1844-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1831-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1827-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1953-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1860-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1826-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1966-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1888-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1834-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1969-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1990-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1828-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1805-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1951-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1990-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1957-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1974-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1988-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1866-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1938-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1953-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1972-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1861-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1828-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1874-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1973-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1838-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1981-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1998-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1988-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1940-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1848-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1998-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1993-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1873-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1850-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1895-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1932-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1816-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1960-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1969-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1840-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1882-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1956-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1946-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1916-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1948-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1871-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1892-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1870-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1937-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1845-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1975-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1949-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1822-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1849-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1987-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1832-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1982-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1991-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1977-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1933-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1910-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1996-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1971-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1893-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1945-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1923-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1896-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1905-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1918-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1968-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1843-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1857-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1959-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1987-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1862-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1980-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1927-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1952-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1925-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1826-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1859-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1902-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1857-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1981-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1959-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1899-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1925-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1972-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1898-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1867-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1906-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1976-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1840-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1915-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1993-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1829-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1979-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1982-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1877-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1967-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1823-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1846-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1835-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1989-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1943-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1812-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1841-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1810-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1939-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1880-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1931-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1846-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1885-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1818-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1814-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1897-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1913-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1949-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1936-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1865-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1930-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1854-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1955-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1889-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1852-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1877-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1954-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1868-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1964-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1935-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1970-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1891-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1878-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1958-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1897-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1885-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1876-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1920-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1882-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1887-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1929-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1842-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1806-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1865-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1891-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1858-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1995-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1827-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1963-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1940-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1924-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1841-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1923-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/GDL-1933-issues.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/issues/JDG-1927-issues.jsonl.bz2')]" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issue_bag.starmap(upload, bucket_name='original-canonical-compressed').compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1908-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1997-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1862-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1890-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1901-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1900-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1870-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1965-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1880-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1839-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1860-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1836-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1921-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1799-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1884-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1908-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1984-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1879-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1835-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1977-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1813-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1819-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1883-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1817-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1985-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1901-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1869-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1939-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1868-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1881-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1907-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1906-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1894-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1843-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1976-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1962-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1926-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1947-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1970-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1856-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1898-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1994-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1979-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1955-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1903-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1888-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1886-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1971-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1821-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1964-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1921-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1839-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1895-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1844-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1937-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1861-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1950-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1941-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1905-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1832-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1903-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1938-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1872-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1883-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1930-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1958-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1858-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1884-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1815-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1871-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1974-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1913-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1914-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1837-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1934-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1833-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1994-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1811-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1809-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1886-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1890-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1889-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1942-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1948-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1855-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1833-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1951-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1984-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1900-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1847-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1968-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1942-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1983-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1944-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1916-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1943-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1917-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1808-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1947-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1830-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1825-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1966-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1961-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1963-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1936-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1986-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1912-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1894-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1863-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1853-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1973-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1924-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1831-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1980-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1798-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1920-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1952-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1911-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1852-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1820-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1919-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1896-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1962-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1854-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1922-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1946-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1893-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1829-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1874-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1804-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1904-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1863-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1957-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1909-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1997-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1944-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1853-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1910-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1879-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1928-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1878-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1807-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1848-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1851-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1876-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1914-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1996-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1929-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1915-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1985-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1850-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1934-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1847-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1887-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1986-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1978-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1931-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1845-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1875-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1975-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1836-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1838-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1961-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1950-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1824-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1989-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1830-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1932-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1992-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1912-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1881-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1842-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1928-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1855-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1902-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1956-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1978-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1983-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1849-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1864-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1904-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1992-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1991-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1869-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1911-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1892-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1907-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1875-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1922-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1856-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1872-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1954-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1967-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1995-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1873-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1926-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1834-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1866-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1941-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1960-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1864-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1945-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1867-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1965-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1859-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1935-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1899-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1851-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1909-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1844-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1831-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1827-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1953-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1860-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1826-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1966-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1888-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1834-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1969-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1990-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1828-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1805-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1951-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1990-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1957-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1974-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1988-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1866-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1938-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1953-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1972-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1861-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1828-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1874-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1973-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1838-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1981-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1998-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1988-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1940-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1848-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1998-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1993-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1873-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1850-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1895-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1932-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1816-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1960-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1969-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1840-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1882-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1956-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1946-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1916-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1948-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1871-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1892-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1870-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1937-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1845-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1975-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1949-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1822-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1849-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1987-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1832-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1982-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1991-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1977-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1933-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1910-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1996-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1971-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1893-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1945-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1923-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1896-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1905-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1918-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1968-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1843-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1857-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1959-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1987-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1862-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1980-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1927-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1952-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1925-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1826-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1859-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1902-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1857-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1981-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1959-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1899-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1925-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1972-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1898-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1867-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1906-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1976-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1840-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1915-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1993-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1829-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1979-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1982-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1877-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1967-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1823-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1846-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1835-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1989-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1943-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1812-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1841-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1810-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1939-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1880-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1931-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1846-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1885-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1818-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1814-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1897-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1913-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1949-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1936-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1865-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1930-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1854-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1955-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1889-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1852-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1877-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1954-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1868-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1964-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1935-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1970-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1891-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1878-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1958-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1897-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1885-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1876-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1920-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1882-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1887-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1929-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1842-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1806-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1865-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1891-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1858-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1995-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1827-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1963-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1940-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1924-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1841-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1923-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/GDL-1933-pages.jsonl.bz2'),\n", + " (True,\n", + " '/scratch/matteo/impresso-canonical-compressed/pages/JDG-1927-pages.jsonl.bz2')]" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pages_bag.starmap(upload, bucket_name='original-canonical-compressed').compute()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}