From 382a828851e3ccda060befea7179f7703e0c4705 Mon Sep 17 00:00:00 2001 From: Isaiah Odhner Date: Sat, 3 Aug 2024 01:48:10 -0400 Subject: [PATCH] Improve websim git export script - Automate aggregating views of the version history by recursively clicking earliest version, reopening the versions list, and waiting for it to load. - Prompt user to pick elements to build query selectors from, instead of hard-coding fragile selectors. (I've mostly re-used my code from the os-gui.js kitchen sink here, but I've converted it to use vanilla JS instead of jQuery, and to use a promise instead of callback.) - Add output dialog where you can copy JSON, or an LLM prompt for better commit summaries. --- scripts/dl-websim-versions.js | 543 ++++++++++++++++++++++++++++++++-- 1 file changed, 519 insertions(+), 24 deletions(-) diff --git a/scripts/dl-websim-versions.js b/scripts/dl-websim-versions.js index 6390fa0..4633081 100644 --- a/scripts/dl-websim-versions.js +++ b/scripts/dl-websim-versions.js @@ -1,34 +1,527 @@ // This is a script to download all the versions of a websim.ai site to bring it into local version control. -// First, run this function in the console of the latest version, then jump to the oldest in the list and repeat, collecting the results in the correct order. -// They're using unsemantic HTML, with only presentational classes. -// I'll just use a temporary global for the list element (which isn't a list element, it's just a div, of course), -// by right clicking in the DOM inspector and selecting "Store as global variable" on the div that contains the list of versions. +// First, copy and paste this script into the browser console to collect versions, +// then paste the result into the script as the versions array below, and run the script in Node.js to download the versions. + +// Optionally, before running the script in Node.js, you can edit the commit summaries (and add notes) in the versions array. +// You can add commit summaries with an LLM (like ChatGPT) with a prompt like "Add short one-line commitSummary fields to these, based on the prompts." +// or use GitHub Copilot to autocomplete the commit summaries interactively, or just enter them manually. +// One could also edit commit messages later running the script, possibly in one big `rebase -i`. +// Recommended: if using a chatbot-style LLM interface, run a diff between the original input and the output to check for unexpected changes. +// It may incidentally try to fix typos in your prompts, for instance, or simply mess up the JSON syntax. +// An autocompletion-style LLM interface (like GitHub Copilot) would avoid this issue. +// Also: note that the LLM doesn't know what changes where successfully made by the other LLM (powering websim). +// summaries like "Fix " may be more accurately written as "Try to fix " :) + +// ------------------------------ + +// WebSim is using unsemantic HTML, with only presentational classes, unfortunately, +// so I can't just find the list of versions with a selector like `ul.versions`. +// Instead I'm prompting the user to select the element with the list of versions. + // By the way, the nodes in this (non-semantic) list are presented in the reverse order from how they are in the DOM, as of 2024-07-27. -// Each capture should overlap by one version, so you can check that the duplicate versions are next to each other to ensure the correct order, -// then remove the duplicates and concatenate the lists. -function collectVersions(versionListDivElement) { - const linkUrls = [...versionListDivElement.querySelectorAll(`a[href^='https://websim.ai/c/']`)].map(a => a.href); - const prompts = [...versionListDivElement.querySelectorAll(`div.text-black.whitespace-pre-wrap.flex.flex-col.items-start.flex-1 > div > div > div > div > span > span`)].map((el) => el.textContent); - // const associated = prompts.map((prompt, i) => ({ prompt, linkUrl: linkUrls[i], dlUrl: linkUrls[i].replace('websim.ai/c/', 'party.websim.ai/api/v1/sites/').replace(/\/$/, '') + '/html?raw=true' })); - const associated = prompts.map((prompt, i) => { - const id = linkUrls[i].match(/https:\/\/websim.ai\/c\/([^/]+)/)[1]; - return { prompt, id, /*linkUrl: linkUrls[i], dlUrl: `https://party.websim.ai/api/v1/sites/${id}/html?raw=true`*/ }; +// The first node is the earliest version, shown at the bottom of the list. + +// (Hm, I guess I could select based on the class .flex-col-reverse, since if that's removed the script is likely to break anyway... +// and then also check that the selected element contains version links - filter based on this, and then assert that there's only one element matching the filter. +// That would take care of the first interaction... as for finding a selector for the prompt text, that seems trickier, +// but maybe I could get the prompt from the "address bar" and the find an element in the list (deepest in the DOM) that contains that text.) + +// TODO: Make automation easier to cancel. To hit Esc after pasting the script in the console, you have to focus the page, +// but clicking will select an element, so you have to press the mouse button down and then hit Esc before releasing it. +// Also there's no way to abort once it starts collecting versions, so it should be possible to cancel that too. +// Could add a cancel button. Could add a start button too, so the page is likely focused when you try to press Esc. +// Could also move the overlay to the bottom of the screen since the version list is near the top. + +async function collectAllVersions(versionListDivSelector, promptSelector) { + const aggregatedResults = []; + + function collectVisibleVersions() { + // Don't move this querySelector outside the function; apparently the whole browser UI is recreated when clicking the link + const versionListDivElement = document.querySelector(versionListDivSelector); + if (!versionListDivElement) { + return []; + } + const linkUrls = [...versionListDivElement.querySelectorAll(`a[href^='https://websim.ai/c/']`)].map(a => a.href); + const prompts = [...versionListDivElement.querySelectorAll(promptSelector)].map((el) => el.textContent); + const associated = prompts.map((prompt, i) => { + const id = linkUrls[i].match(/https:\/\/websim.ai\/c\/([^/]+)/)[1]; + return { prompt, id }; + }); + return associated; + } + + function waitFor(condition, { timeout = 10000, interval = 100 } = {}) { + return new Promise((resolve, reject) => { + const timer = setInterval(() => { + if (condition()) { + clearInterval(timer); + resolve(); + } + }, interval); + setTimeout(() => { + clearInterval(timer); + reject("Timed out waiting for condition."); + }, timeout); + }); + } + + function waitForVersionListToChange(oldVisibleVersions) { + return waitFor(() => { + const visibleVersions = collectVisibleVersions(); + console.log("Waiting for version list to change, old:", oldVisibleVersions, "new:", visibleVersions); + if (visibleVersions.length === 0) { + openVersionList(); + return false; + } + // return visibleVersions[0].id !== oldVisibleVersions[0].id; + // In the case that we're moving to the view that has just the earliest item, + // the earliest item will be the same as in the last snapshot, + // so we need to check the latest items instead. + return visibleVersions[visibleVersions.length - 1].id !== oldVisibleVersions[oldVisibleVersions.length - 1].id; + }).then(() => { + // Wait for the version list to stabilize (finish loading) + let baseline = collectVisibleVersions(); + return waitFor(() => { + const visibleVersions = collectVisibleVersions(); + console.log("Waiting for version list to stabilize (finish loading), loaded already:", baseline.length, "loaded now:", visibleVersions.length); + const finishedLoading = visibleVersions.length === baseline.length; + baseline = visibleVersions; // must be updated after comparison + return finishedLoading; + }, { interval: 2000 }); + }); + } + + async function collectAndClickEarliest() { + const visibleVersions = collectVisibleVersions(); + if (aggregatedResults.length > 0) { + // Sanity check: each capture should overlap by one version + const earliestRecordedVersion = aggregatedResults[0]; + const latestVisibleVersion = visibleVersions[visibleVersions.length - 1]; + if (earliestRecordedVersion.id !== latestVisibleVersion.id) { + alert("Warning: The first node in the visible list is not the same as the last node in the previous capture. The order of versions may be incorrect."); + } + // There should be no other duplicates + const duplicate = visibleVersions.slice(0, -1).find(({ id }) => aggregatedResults.some((item) => item.id === id)); + if (duplicate) { + alert("Warning: Duplicate versions found in the visible list compared to the previous capture."); + } + // Add all but the last version, which is already in the previous capture + aggregatedResults.unshift(...visibleVersions.slice(0, -1)); + } else { + aggregatedResults.unshift(...visibleVersions); + } + + if (visibleVersions.length === 0) { + alert("Websim version links not found."); + return; + } + if (visibleVersions.length === 1) { + // Done - No more versions to collect. + return; + } + + // In parallel, wait for the version list to change and click the earliest version link + let versionListDivElement = document.querySelector(versionListDivSelector); + const earliestVersionLink = versionListDivElement.querySelector(`a[href^='https://websim.ai/c/']`); + await Promise.all([ + waitForVersionListToChange(visibleVersions).catch((err) => { + alert("Timed out waiting for the version list to change."); + }), + new Promise((resolve) => { + earliestVersionLink.click(); + resolve(); + }), + ]); + + await collectAndClickEarliest(); + } + + await collectAndClickEarliest(); + + return aggregatedResults; +} + +function openVersionList() { + // mouseup is what actually does it, but don't tell anyone + const addressBar = document.querySelector("[name='url']"); + addressBar.dispatchEvent(new MouseEvent('mousedown', { bubbles: true })); + addressBar.dispatchEvent(new MouseEvent('pointerdown', { bubbles: true })); + addressBar.dispatchEvent(new MouseEvent('mouseup', { bubbles: true })); + addressBar.dispatchEvent(new MouseEvent('pointerup', { bubbles: true })); + addressBar.dispatchEvent(new MouseEvent('click', { bubbles: true })); +} + + +// Based on https://jsfiddle.net/Sillvva/qof6h0up/ +// found via https://stackoverflow.com/questions/8588301/how-to-generate-unique-css-selector-for-dom-element#comment115592481_49663134 +function buildQuerySelector(elem, relativeToParent = document.body) { + let path = []; + let parent; + while (parent = elem.parentNode) { + let tag = elem.tagName; + let siblings; + // Avoiding invalid CSS selectors from certain class names like "max-h-[calc(100vh-8rem)]" + // Could use escaping but this is simpler, and these layout framework classes are unlikely to be useful in selectors + // Also "body.__classname_36bd41" is valid as a selector, but not useful, not sure where it comes from + let classes = Array.from(elem.classList.values()).filter(c => /^[a-z][a-z0-9_\-]*$/i.test(c)); + let classStr = classes.length ? `.${classes.join('.')}` : ''; + path.unshift( + elem.id ? `#${elem.id}` : ( + siblings = parent.children, + [].filter.call(siblings, sibling => + sibling.tagName === tag && + JSON.stringify(classes.sort()) === JSON.stringify( + Array.from(sibling.classList.values()).sort() + ) + ).length === 1 ? + `${tag}${classStr}` : + `${tag}${classStr}:nth-child(${1 + [].indexOf.call(siblings, elem)})` + ) + ); + if (elem === relativeToParent) break; + elem = parent; + }; + return `${path.join(' > ')}`.toLowerCase(); +}; + + +// Add commit summaries (can be improved with ChatGPT or manual editing before committing) +function addCommitSummaries(results) { + return results.map(({ prompt, id }) => { + let commitSummary = prompt; + const maxLength = 50; + if (prompt.length > maxLength) { + let cutOff = maxLength - '...'.length; + if (prompt.includes("\n")) { + cutOff = Math.min(cutOff, prompt.indexOf("\n")); + } + commitSummary = prompt.substring(0, cutOff) + '...'; + } + return { prompt, id, commitSummary }; }); - return associated; } -// JSON.stringify(collectVersions(temp1), null, "\t") -// Note: I think it uses history.pushState/replacesState to change the URL without reloading the page, so I could have automated this further, -// by clicking on the earliest version, then running the function to collect the versions, aggregating the results automatically, -// (optionally) ensuring the order is correct via the duplicates, and removing the duplicates. +/** + * Prompts the user to pick an element matching a selector. + * @param {string | (Element => boolean)} elementFilter A CSS selector or a function that returns `true` for the desired elements. + * @param {string} [message] The message to display to the user. + * @param {string} [subMessage] Extra text to show below the main message. + * @returns {Promise} The selected element, or `null` if no element was selected. May never resolve if the user cancels. + */ +async function pickElement(elementFilter, message = "Select an element.", subMessage = "") { + const overlayMessage = document.createElement('div'); + overlayMessage.textContent = message; + Object.assign(overlayMessage.style, { + position: 'fixed', + top: '0', + left: '0', + width: '100%', + textAlign: 'center', + fontSize: '2em', + color: 'white', + backgroundColor: 'rgba(0,0,0,0.5)', + padding: '1em', + pointerEvents: 'none', + zIndex: '9999999999' + }); + + const smallText = document.createElement('small'); + smallText.style.display = 'block'; + smallText.style.fontSize = '0.6em'; + smallText.innerHTML = 'Press Esc to cancel.'; + if (subMessage) { + smallText.prepend(subMessage, document.createElement('br')); + } + overlayMessage.appendChild(smallText); + + const targetOverlay = document.createElement('div'); + targetOverlay.classList.add('target-overlay'); + Object.assign(targetOverlay.style, { + position: 'fixed', + boxSizing: 'border-box', + outline: '2px dashed black', + boxShadow: '0 0 0 2px white, 0 0 0 3px red, 0 0 0 1px red inset', + zIndex: '9999999999', + cursor: 'pointer', + display: 'none' + }); + document.body.appendChild(targetOverlay); + + /** @type {Element | null} */ + let currentEl = null; + + const cleanup = () => { + document.body.removeChild(overlayMessage); + document.body.removeChild(targetOverlay); + removeEventListener('keydown', keydown, true); + removeEventListener('pointermove', pointermove, true); + removeEventListener('pointerdown', pointerdown, true); + }; + + const promise = new Promise((resolve) => { + targetOverlay.addEventListener('click', () => { + cleanup(); + resolve(currentEl); + }); + }); + + const keydown = (/** @type {KeyboardEvent} */ e) => { + if (e.key === 'Escape') { + cleanup(); + e.preventDefault(); + e.stopImmediatePropagation(); + } + }; + + const pointermove = (/** @type {PointerEvent} */ e) => { + const matchedEl = document.elementsFromPoint(e.clientX, e.clientY) + .find((el) => + (!el.matches('.target-overlay')) && + (typeof elementFilter === 'function' ? elementFilter(el) : el.matches(elementFilter) + )); + if (matchedEl) { + currentEl = matchedEl; + const rect = matchedEl.getBoundingClientRect(); + Object.assign(targetOverlay.style, { + top: `${rect.top}px`, + left: `${rect.left}px`, + width: `${rect.width}px`, + height: `${rect.height}px`, + display: 'block' + }); + } else { + targetOverlay.style.display = 'none'; + } + }; + + const pointerdown = (/** @type {PointerEvent} */ e) => { + e.preventDefault(); // prevent focus change + }; -// Can add commit summaries with ChatGPT, with prompt: -// > Add short one-line `commitSummary` fields to these, based on the prompts. They can be the same as the prompt if it's short enough. -// but GitHub Copilot is probably more helpful. -// What I've done is actually prompt ChatGPT twice, copy both results into the objects using multi-cursor editing, -// so I can pick one/edit it manually, and delete the other. -// One could also just edit commit messages later, possibly in one big `rebase -i`. + addEventListener('keydown', keydown, true); + addEventListener('pointermove', pointermove, true); + addEventListener('pointerdown', pointerdown, true); + + document.body.appendChild(overlayMessage); + + return promise; +} + +async function collectVersionsInteractively() { + openVersionList(); + const hasLinks = (el) => el.querySelectorAll(`a[href^='https://websim.ai/c/']`).length > 0; + const versionListDivElement = await pickElement(hasLinks, "Select the element containing the list of versions.", "(Click in the space between two items.)"); + const versionListDivSelector = buildQuerySelector(versionListDivElement); + console.log("Generated version list selector:", versionListDivSelector); + // Sanity check: the selector should match exactly the one element we picked + if (document.querySelectorAll(versionListDivSelector).length !== 1) { + alert("Error: The generated version list selector does not match exactly one element."); + return; + } + if (document.querySelector(versionListDivSelector) !== versionListDivElement) { + alert("Error: The generated version list selector matched a different element from the one picked."); + return; + } + const mightBePrompt = (el) => el.textContent.length > 8 && el.closest(versionListDivSelector) === versionListDivElement && !hasLinks(el); + const promptElement = await pickElement(mightBePrompt, "Select the prompt text from of the versions in the list.", "(Click directly on the text of a prompt.)"); + let promptSelector = buildQuerySelector(promptElement, versionListDivElement); + console.log("Initially generated prompt selector:", promptSelector); + // remove first :nth-child(), so that it matches multiple items, not the specific list item + promptSelector = promptSelector.replace(/:nth-child\(\d+\)/, ''); + console.log("Adjusted generated prompt selector:", promptSelector); + // Sanity check: the selector should match the element we picked (among others) + if (document.querySelectorAll(promptSelector).length === 0) { + alert("Error: The generated prompt selector does not match any elements."); + return; + } + if (!promptElement.matches(promptSelector)) { + alert("Error: The picked prompt element does not match the generated selector."); + return; + } + const allVersions = await collectAllVersions(versionListDivSelector, promptSelector); + const versionsWithCommitSummaries = addCommitSummaries(allVersions); + const json = JSON.stringify(versionsWithCommitSummaries, null, "\t"); + console.log(json); + const llmPrompt = json.replace(/"commitSummary": "([^"]*)"/g, '"commitSummary": ""') + "\n\n\nAdd short one-line commitSummary fields to these, based on the prompts."; + showOutputDialog([ + { outputText: json, noun: "JSON", label: "JSON", default: true }, + { outputText: llmPrompt, noun: "LLM prompt", label: "LLM prompt (for automatic commit summaries)" }, + ]); +} + +function showOutputDialog(options) { + // Remove existing stylesheet if it exists + const existingStyle = document.getElementById('websim-exporter-dialog-style'); + if (existingStyle) { + existingStyle.remove(); + } + + // Create a new stylesheet + const style = document.createElement('style'); + style.id = 'websim-exporter-dialog-style'; + style.textContent = ` + .websim-exporter-dialog { + font-family: Arial, sans-serif; + background-color: #f9f9f9; + border: 1px solid #ccc; + padding: 20px; + position: fixed; + box-shadow: 0 4px 8px rgba(0,0,0,0.1); + border-radius: 4px; + z-index: 1000; + display: flex; + flex-direction: column; + align-items: center; + } + .websim-exporter-dialog label { + margin-bottom: 10px; + } + .websim-exporter-dialog .output-preview { + border: 1px solid #ccc; + padding: 10px; + width: 70vw; + height: 70vh; + white-space: pre-wrap; + overflow-wrap: break-word; + overflow-y: auto; + margin-bottom: 20px; + } + .websim-exporter-dialog .buttons { + margin-top: 10px; + } + .websim-exporter-dialog .buttons button { + margin: 0 5px; + padding: 8px 16px; + cursor: pointer; + border: none; + background-color: #007bff; + color: white; + border-radius: 4px; + outline: none; + } + .websim-exporter-dialog .buttons button:hover { + background-color: #0056b3; + } + .websim-exporter-toast { + position: fixed; + bottom: 30px; + right: 30px; + background-color: rgba(0, 0, 0, 0.8); + color: white; + padding: 10px 20px; + border-radius: 4px; + z-index: 1100; + } + .websim-exporter-toast.error { + background-color: #dc3545; + } + .websim-exporter-toast.success { + background-color: #28a745; + } + `; + + document.head.appendChild(style); + + // Create dialog element + const dialog = document.createElement('dialog'); + dialog.classList.add('websim-exporter-dialog'); + + // Create radio group and output preview + const radioGroup = document.createElement('div'); + options.forEach((opt, index) => { + const radioInput = document.createElement('input'); + radioInput.type = 'radio'; + radioInput.id = `option${index}`; + radioInput.name = 'outputOption'; + radioInput.value = index.toString(); + radioInput.addEventListener('change', () => { + previewOutput(opt.outputText, opt.noun); + }); + if (opt.default) { + radioInput.checked = true; + // previewOutput(opt.outputText, opt.noun); called after outputPreview is created + // could reorder things to simplify this a bit + } + + const radioLabel = document.createElement('label'); + radioLabel.setAttribute('for', `option${index}`); + radioLabel.textContent = opt.label; + + radioGroup.appendChild(radioInput); + radioGroup.appendChild(radioLabel); + radioGroup.appendChild(document.createElement('br')); + }); + + const outputPreview = document.createElement('pre'); + outputPreview.classList.add('output-preview'); + + dialog.appendChild(radioGroup); + dialog.appendChild(outputPreview); + + // Create close button + const closeButton = document.createElement('button'); + closeButton.textContent = 'Close'; + closeButton.addEventListener('click', () => { + dialog.remove(); + }); + + // Create copy to clipboard button + const copyButton = document.createElement('button'); + copyButton.textContent = 'Copy to Clipboard'; + copyButton.addEventListener('click', () => { + const selectedOption = document.querySelector('input[name="outputOption"]:checked'); + if (selectedOption) { + const index = parseInt(selectedOption.value); + const selectedOpt = options[index]; + + // Copy to clipboard logic + navigator.clipboard.writeText(selectedOpt.outputText) + .then(() => { + showToast(`Copied ${selectedOpt.noun} to clipboard.`, 'success'); + }) + .catch((err) => { + showToast(`Failed to copy ${selectedOpt.noun} to clipboard: ${err}`, 'error'); + }); + } + }); + + const buttonContainer = document.createElement('div'); + buttonContainer.classList.add('buttons'); + buttonContainer.appendChild(closeButton); + buttonContainer.appendChild(copyButton); + dialog.appendChild(buttonContainer); + + // Handle default selection preview + options.forEach((opt, index) => { + if (opt.default) { + previewOutput(opt.outputText, opt.noun); + } + }); + + // Show dialog + document.body.appendChild(dialog); + dialog.showModal(); + + // Function to preview selected output text + function previewOutput(outputText, noun) { + outputPreview.textContent = outputText; + } + + // Function to show toast message + function showToast(message, extraClass = '') { + const toast = document.createElement('div'); + toast.classList.add('websim-exporter-toast', extraClass); + toast.textContent = message; + // document.body.appendChild(toast); // would go behind modal dialog + dialog.append(toast); + + // Remove toast after 3 seconds + setTimeout(() => { + toast.remove(); + }, 3000); + } +} const versions = [ { @@ -232,4 +725,6 @@ ${prompt}`; } if (typeof window === 'undefined') { downloadVersions(versions, 'websim-version', 'oregano.html'); +} else { + collectVersionsInteractively(); }