docs: fix imports of cheerio in examples (#664)

Related: #661
apify · Aug 8, 2023 · bf1af7b · bf1af7b
1 parent 845b9d7
commit bf1af7b
Show file tree

Hide file tree

Showing 12 changed files with 34 additions and 37 deletions.
diff --git a/sources/academy/tutorials/node_js/dealing_with_dynamic_pages.md b/sources/academy/tutorials/node_js/dealing_with_dynamic_pages.md
@@ -144,7 +144,6 @@ So, we've gotta scroll down the page to load these images. Luckily, because we'r
 
 ```js
 import { PuppeteerCrawler, utils, Dataset } from 'crawlee';
-import cheerio from 'cheerio';
 
 const BASE_URL = 'https://demo-webstore.apify.org';
 

diff --git a/...webscraping/puppeteer_playwright/common_use_cases/paginating_through_results.md b/...webscraping/puppeteer_playwright/common_use_cases/paginating_through_results.md
@@ -16,7 +16,7 @@ import TabItem from '@theme/TabItem';
 
 If you're trying to [collect data](../executing_scripts/extracting_data.md) on a website that has millions, thousands, or even just hundreds of results, it is very likely that they are paginating their results to reduce strain on their backend as well as on the users loading and rendering the content.
 
-![Amazon pagination](https://apify-docs.s3.amazonaws.com/master/docs/assets/tutorials/images/pagination.jpg)
+![Amazon pagination](../../advanced_web_scraping/images/pagination.png)
 
 Attempting to scrape thousands to tens of thousands of results using a headless browser on a website that only shows 30 results at a time might be daunting at first, but be rest assured that by the end of this lesson you'll feel confident when faced with this use case.
 
@@ -53,7 +53,6 @@ Let's grab this number now with a little bit of code:
 
 ```javascript
 import { chromium } from 'playwright';
-import { load } from 'cheerio';
 
 const repositories = [];
 
@@ -79,7 +78,6 @@ await browser.close();
 
 ```javascript
 import puppeteer from 'puppeteer';
-import { load } from 'cheerio';
 
 const repositories = [];
 
@@ -118,7 +116,7 @@ And since we're already on the first page, we'll go ahead and scrape the repos f
 
 ```javascript
 import { chromium } from 'playwright';
-import { load } from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const repositories = [];
 
@@ -127,7 +125,7 @@ const REPOSITORIES_URL = `${BASE_URL}/orgs/facebook/repositories`;
 
 // Create a function which grabs all repos from a page
 const scrapeRepos = async (page) => {
-    const $ = load(await page.content());
+    const $ = cheerio.load(await page.content());
 
     return [...$('li.Box-row')].map((item) => {
         const elem = $(item);
@@ -163,7 +161,7 @@ await browser.close();
 
 ```javascript
 import puppeteer from 'puppeteer';
-import { load } from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const repositories = [];
 
@@ -172,7 +170,7 @@ const REPOSITORIES_URL = `${BASE_URL}/orgs/facebook/repositories`;
 
 // Create a function which grabs all repos from a page
 const scrapeRepos = async (page) => {
-    const $ = load(await page.content());
+    const $ = cheerio.load(await page.content());
 
     return [...$('li.Box-row')].map((item) => {
         const elem = $(item);
@@ -260,15 +258,15 @@ After all is said and done, here's what our final code looks like:
 
 ```javascript
 import { chromium } from 'playwright';
-import { load } from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const repositories = [];
 
 const BASE_URL = 'https://github.com';
 const REPOSITORIES_URL = `${BASE_URL}/orgs/facebook/repositories`;
 
 const scrapeRepos = async (page) => {
-    const $ = load(await page.content());
+    const $ = cheerio.load(await page.content());
 
     return [...$('li.Box-row')].map((item) => {
         const elem = $(item);
@@ -321,7 +319,7 @@ await browser.close();
 
 ```javascript
 import puppeteer from 'puppeteer';
-import { load } from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const repositories = [];
 
@@ -330,7 +328,7 @@ const REPOSITORIES_URL = `${BASE_URL}/orgs/facebook/repositories`;
 
 // Create a function which grabs all repos from a page
 const scrapeRepos = async (page) => {
-    const $ = load(await page.content());
+    const $ = cheerio.load(await page.content());
 
     return [...$('li.Box-row')].map((item) => {
         const elem = $(item);
@@ -402,7 +400,6 @@ We're going to scrape the brand and price from the first 75 results on the **Abo
 
 ```javascript
 import { chromium } from 'playwright';
-import { load } from 'cheerio';
 
 // Create an array where all scraped products will
 // be pushed to
@@ -421,7 +418,6 @@ await browser.close();
 
 ```javascript
 import puppeteer from 'puppeteer';
-import { load } from 'cheerio';
 
 // Create an array where all scraped products will
 // be pushed to
@@ -543,7 +539,9 @@ Now, the `while` loop will exit out if we've reached the bottom of the page.
 Within the loop, we can grab hold of the total number of items on the page. To avoid extracting and pushing duplicate items to the **products** array, we can use the `.slice()` method to cut out the items we've already scraped.
 
 ```js
-const $ = load(await page.content());
+import * as cheerio from 'cheerio';
+
+const $ = cheerio.load(await page.content());
 
 // Grab the newly loaded items
 const items = [...$('a[data-testid*="productTile"]')].slice(products.length);
@@ -569,7 +567,7 @@ With everything completed, this is what we're left with:
 
 ```javascript
 import { chromium } from 'playwright';
-import { load } from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const products = [];
 
@@ -592,7 +590,7 @@ while (products.length < 75) {
     // Allow the products 1 second to load
     await page.waitForTimeout(1000);
 
-    const $ = load(await page.content());
+    const $ = cheerio.load(await page.content());
 
     // Grab the newly loaded items
     const items = [...$('a[data-testid*="productTile"]')].slice(products.length);
@@ -628,7 +626,7 @@ await browser.close();
 
 ```javascript
 import puppeteer from 'puppeteer';
-import { load } from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const products = [];
 
@@ -651,7 +649,7 @@ while (products.length < 75) {
     // Allow the products 1 second to load
     await page.waitForTimeout(1000);
 
-    const $ = load(await page.content());
+    const $ = cheerio.load(await page.content());
 
     // Grab the newly loaded items
     const items = [...$('a[data-testid*="productTile"]')].slice(products.length);

diff --git a/sources/academy/webscraping/web_scraping_for_beginners/crawling/finding_links.md b/sources/academy/webscraping/web_scraping_for_beginners/crawling/finding_links.md
@@ -53,7 +53,7 @@ We'll start from a boilerplate that's very similar to the scraper we built in [B
 
 ```js title=crawler.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const storeUrl = 'https://warehouse-theme-metal.myshopify.com/collections/sales';
 

diff --git a/sources/academy/webscraping/web_scraping_for_beginners/crawling/first_crawl.md b/sources/academy/webscraping/web_scraping_for_beginners/crawling/first_crawl.md
@@ -21,7 +21,7 @@ In the previous lessons, we collected and filtered all the URLs pointing to indi
 
 ```js title=crawler.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const WEBSITE_URL = 'https://warehouse-theme-metal.myshopify.com';
 const storeUrl = `${WEBSITE_URL}/collections/sales`;
@@ -75,7 +75,7 @@ In programming, you handle errors by catching and handling them. Typically by pr
 
 ```js title=crawler.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const WEBSITE_URL = 'https://warehouse-theme-metal.myshopify.com';
 const storeUrl = `${WEBSITE_URL}/collections/sales`;

diff --git a/...demy/webscraping/web_scraping_for_beginners/crawling/recap_extraction_basics.md b/...demy/webscraping/web_scraping_for_beginners/crawling/recap_extraction_basics.md
@@ -18,7 +18,7 @@ We finished off the [first section](../data_extraction/index.md) of the _Web Scr
 // download, extract, and convert the data we wanted
 import { writeFileSync } from 'fs';
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 import { parse } from 'json2csv';
 
 // Here, we fetched the website's HTML and saved it to a new variable.

diff --git a/sources/academy/webscraping/web_scraping_for_beginners/crawling/relative_urls.md b/sources/academy/webscraping/web_scraping_for_beginners/crawling/relative_urls.md
@@ -35,7 +35,7 @@ Let's update the Node.js code from the [Finding links lesson](./finding_links.md
 
 ```js title=crawler.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const storeUrl = 'https://warehouse-theme-metal.myshopify.com/collections/sales';
 
@@ -72,7 +72,7 @@ When we plug this into our crawler code, we will get the correct - absolute - UR
 
 ```js title=crawler.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 // Split the base URL from the category to use it later.
 const WEBSITE_URL = 'https://warehouse-theme-metal.myshopify.com';

diff --git a/...es/academy/webscraping/web_scraping_for_beginners/crawling/scraping_the_data.md b/...es/academy/webscraping/web_scraping_for_beginners/crawling/scraping_the_data.md
@@ -21,7 +21,7 @@ Let's start writing a script that extracts data from this single PDP. We can use
 
 ```js title=product.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const productUrl = 'https://warehouse-theme-metal.myshopify.com/products/denon-ah-c720-in-ear-headphones';
 const response = await gotScraping(productUrl);
@@ -123,7 +123,7 @@ Let's compare the above data extraction example with the crawling code we wrote
 
 ```js title=crawler.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const WEBSITE_URL = 'https://warehouse-theme-metal.myshopify.com';
 const storeUrl = `${WEBSITE_URL}/collections/sales`;
@@ -171,7 +171,7 @@ We'll start by adding our imports and constants at the top of the file, no chang
 
 ```js title=final.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const WEBSITE_URL = 'https://warehouse-theme-metal.myshopify.com';
 ```

diff --git a/...cademy/webscraping/web_scraping_for_beginners/data_extraction/node_continued.md b/...cademy/webscraping/web_scraping_for_beginners/data_extraction/node_continued.md
@@ -38,7 +38,7 @@ Replace the code in your **main.js** with the following, and run it with `node m
 ```js
 // main.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const storeUrl = 'https://warehouse-theme-metal.myshopify.com/collections/sales';
 
@@ -110,7 +110,7 @@ The final scraper code looks like this. Replace the code in your **main.js** fil
 ```js
 // main.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const storeUrl = 'https://warehouse-theme-metal.myshopify.com/collections/sales';
 

diff --git a/...ademy/webscraping/web_scraping_for_beginners/data_extraction/node_js_scraper.md b/...ademy/webscraping/web_scraping_for_beginners/data_extraction/node_js_scraper.md
@@ -43,7 +43,7 @@ To parse the HTML with the `cheerio` library. Replace the code in your **main.js
 ```js
 // main.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 const storeUrl = 'https://warehouse-theme-metal.myshopify.com/collections/sales';
 

diff --git a/...academy/webscraping/web_scraping_for_beginners/data_extraction/project_setup.md b/...academy/webscraping/web_scraping_for_beginners/data_extraction/project_setup.md
@@ -57,7 +57,7 @@ With the libraries installed, create a new file in the project's folder called *
 
 ```js
 import gotScraping from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 
 console.log('it works!');
 ```

diff --git a/...s/academy/webscraping/web_scraping_for_beginners/data_extraction/save_to_csv.md b/...s/academy/webscraping/web_scraping_for_beginners/data_extraction/save_to_csv.md
@@ -40,7 +40,7 @@ The full code including the earlier scraping part now looks like this. Replace t
 ```js
 // main.js
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 import { parse } from 'json2csv'; // <---- added a new import
 
 const storeUrl = 'https://warehouse-theme-metal.myshopify.com/collections/sales';
@@ -105,7 +105,7 @@ When we complete the code, it looks like this. Replace the code in your **main.j
 // main.js
 import { writeFileSync } from 'fs'; // <---- added a new import
 import { gotScraping } from 'got-scraping';
-import cheerio from 'cheerio';
+import * as cheerio from 'cheerio';
 import { parse } from 'json2csv';
 
 const storeUrl = 'https://warehouse-theme-metal.myshopify.com/collections/sales';

diff --git a/sources/platform/actors/development/actor_definition/actor_json.md b/sources/platform/actors/development/actor_definition/actor_json.md
@@ -61,8 +61,8 @@ import TabItem from '@theme/TabItem';
 > Note that actor `name`, `version`, `buildTag`, and `environmentVariables` are currently only used when you deploy your actor using [Apify CLI](/cli) and not when deployed, for example, via GitHub integration. There it serves for informative purposes only. This is suspected to change in the future.
 
 
-| Property         | Type     | Description |
-| ---------------- | -------- |----------- |
+| Property               | Type     | Description |
+|------------------------| -------- |----------- |
 | `actorSpecification`   | Required | We are at a version `1` which is the only one available so this must be set to `1`. |
 | `name`                 | Required | Name of the Actor. |
 | `version`              | Required | Actor version in the form `[Number].[Number]`, i.e. for example `0.0`, `0.1`, `2.3`, ... |
@@ -72,6 +72,6 @@ import TabItem from '@theme/TabItem';
 | `dockerContextDir`     | Optional | Specifies the path to the directory used as the Docker context when building the Actor. The path is relative to the location of the `actor.json` file. Useful for having a monorepo with multiple Actors. See [Actor monorepos](../deployment/source_types.md#actor-monorepos) for more details. |
 | `readme`               | Optional | If you specify the path to your README file under the `readme` field, the README at this path will be used on the platform. If not specified, README at `.actor/README.md` or `README.md` will be used, in this order of preference. See our [Apify Academy article on writing a quality README files](/academy/get-most-of-actors/actor-readme). |
 | `input`                | Optional | You can embed your [input schema](./input_schema/index.md) object directly in `actor.json` under the `input` field. Alternatively, you can provide a path to a custom input schema. If not provided, the input schema at `.actor/INPUT_SCHEMA.json` or `INPUT_SCHEMA.json` is used, in this order of preference. |
-`storages.dataset`       | Optional | You can define the schema of the items in your dataset under the `storages.dataset` field. This can be either an embedded object or a path to a JSON schema file. [Read more](./output_schema.md#specification-version-1) about Actor output schemas. |
+| `storages.dataset`     | Optional | You can define the schema of the items in your dataset under the `storages.dataset` field. This can be either an embedded object or a path to a JSON schema file. [Read more](./output_schema.md#specification-version-1) about Actor output schemas. |
 | `minMemoryMbytes`      | Optional | Specifies the minimum amount of memory in megabytes that an Actor requires to run. Requires an integer value. If both `minMemoryMbytes` and `maxMemoryMbytes` are set, then `minMemoryMbytes` must be the same or lower than `maxMemoryMbytes`. |
 | `maxMemoryMbytes`      | Optional | Specifies the maximum amount of memory in megabytes that an Actor requires to run. It can be used to control the costs of run, especially when developing pay per result actors. Requires an integer value. |
-Original file line number
+Diff line change
@@ Expand Up @@
     ```js
     import { PuppeteerCrawler, utils, Dataset } from 'crawlee';
-    import cheerio from 'cheerio';
     const BASE_URL = 'https://demo-webstore.apify.org';
@@ Expand Down @@