-
Notifications
You must be signed in to change notification settings - Fork 460
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1466878
commit b8e7c15
Showing
1 changed file
with
340 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,340 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "dbfe669a", | ||
"metadata": { | ||
"execution": { | ||
"iopub.execute_input": "2021-02-23T14:22:16.610471Z", | ||
"iopub.status.busy": "2021-02-23T14:22:16.610129Z", | ||
"iopub.status.idle": "2021-02-23T14:22:16.627784Z", | ||
"shell.execute_reply": "2021-02-23T14:22:16.626866Z", | ||
"shell.execute_reply.started": "2021-02-23T14:22:16.610384Z" | ||
}, | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"<img width=\"8%\" alt=\"Instagram.png\" src=\"https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Instagram.png\" style=\"border-radius: 15%\">" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "5bbfcea2", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"# Instagram - Explore API\n", | ||
"<a href=\"https://bit.ly/3JyWIk6\">Give Feedback</a> | <a href=\"https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=Instagram+-+Get+stats+from+posts:+Error+short+description\">Bug report</a>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "394838ed", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"**Tags:** #instagram #snippet #dataframe #content" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "370242e8", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "fcdf88ea-b290-4dc9-8605-08c8724551fd", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"**Last update:** 2024-07-03 (Created: 2024-07-02)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "naas-description", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [ | ||
"description" | ||
] | ||
}, | ||
"source": [ | ||
"**Description:** This notebook retrieves data from an instagram profile through apify." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "de96f02c", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"## Input" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "93bc8174", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"### Import libraries" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "f8cfa596-61fc-4135-a913-915cc5aab9e9", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import requests\n", | ||
"import json\n", | ||
"import time\n", | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "0957e2bc", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"### Setup variables" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "a142e6f7-02d9-4fee-912a-2eb410a82b03", | ||
"metadata": {}, | ||
"source": [ | ||
"- `apify_token`: personal token to access data\n", | ||
"- `instagram_profile_url`: link to instagram profile" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7f9f3fbb-f787-45d4-a7f0-e13662c0b736", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx\"\n", | ||
"instagram_profile_url = \"https://www.instagram.com/naaslife/\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "9829569f", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"## Model" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "40e3d074-6982-4381-bbb9-d5ee8c442c4a", | ||
"metadata": {}, | ||
"source": [ | ||
"### Scrape instagram data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "94d07b97-00e6-4204-a40e-51f3515a9138", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_instagram_data(apify_token, instagram_profile_url):\n", | ||
" # Extract the username from the profile URL\n", | ||
" username = instagram_profile_url.split('/')[-2]\n", | ||
" \n", | ||
" # Define the Apify API URL for the Instagram Profile Scraper\n", | ||
" api_url = \"https://api.apify.com/v2/acts/apify~instagram-profile-scraper/run-sync-get-dataset-items\"\n", | ||
"\n", | ||
" # Define the payload with the necessary parameters\n", | ||
" payload = {\n", | ||
" \"usernames\": [username], # Pass the username as a list\n", | ||
" \"proxyConfig\": {\n", | ||
" \"useApifyProxy\": True\n", | ||
" }\n", | ||
" }\n", | ||
"\n", | ||
" # Define the headers with the Apify API token\n", | ||
" headers = {\n", | ||
" \"Authorization\": f\"Bearer {apify_token}\",\n", | ||
" \"Content-Type\": \"application/json\"\n", | ||
" }\n", | ||
"\n", | ||
" # Make the request to the Apify API\n", | ||
" response = requests.post(api_url, json=payload, headers=headers)\n", | ||
"\n", | ||
" # Check if the response is successful\n", | ||
" if response.status_code == 200:\n", | ||
" # Extract the JSON data from the response\n", | ||
" data = response.json()\n", | ||
"\n", | ||
" # Check if the data contains the profile information\n", | ||
" if data and len(data) > 0:\n", | ||
" return data[0]\n", | ||
" else:\n", | ||
" return \"No profile data found.\"\n", | ||
" else:\n", | ||
" return f\"Error: {response.status_code} - {response.text}\"\n", | ||
"\n", | ||
"def process_instagram_data(data):\n", | ||
" # Extract and organize data into DataFrames\n", | ||
" posts = data.get('posts', [])\n", | ||
" profiles = [data.get('user', {})]\n", | ||
" places = [post.get('location', {}) for post in posts if post.get('location')]\n", | ||
" hashtags = [hashtag for post in posts for hashtag in post.get('hashtags', [])]\n", | ||
" photos = [post.get('images', []) for post in posts]\n", | ||
" comments = [comment for post in posts for comment in post.get('comments', [])]\n", | ||
"\n", | ||
" # Create DataFrames\n", | ||
" df_posts = pd.DataFrame(posts)\n", | ||
" df_profiles = pd.DataFrame(profiles)\n", | ||
" df_places = pd.DataFrame(places)\n", | ||
" df_hashtags = pd.DataFrame(hashtags, columns=['hashtag'])\n", | ||
" df_photos = pd.DataFrame(photos)\n", | ||
" df_comments = pd.DataFrame(comments)\n", | ||
"\n", | ||
" return df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "14696ed5", | ||
"metadata": { | ||
"execution": { | ||
"iopub.execute_input": "2021-07-02T23:32:10.789097Z", | ||
"iopub.status.busy": "2021-07-02T23:32:10.788829Z", | ||
"iopub.status.idle": "2021-07-02T23:32:10.796900Z", | ||
"shell.execute_reply": "2021-07-02T23:32:10.796358Z", | ||
"shell.execute_reply.started": "2021-07-02T23:32:10.789033Z" | ||
}, | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"## Output" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "72557a2c", | ||
"metadata": { | ||
"papermill": {}, | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"### Display result" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "1d3039cc-b820-4b02-8e01-24fd97527009", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"profile_data = get_instagram_data(apify_token, instagram_profile_url)\n", | ||
"\n", | ||
"if isinstance(profile_data, dict):\n", | ||
" df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments = process_instagram_data(profile_data)\n", | ||
" \n", | ||
" print(\"Posts DataFrame:\")\n", | ||
" print(df_posts)\n", | ||
" print(\"\\nProfiles DataFrame:\")\n", | ||
" print(df_profiles)\n", | ||
" print(\"\\nPlaces DataFrame:\")\n", | ||
" print(df_places)\n", | ||
" print(\"\\nHashtags DataFrame:\")\n", | ||
" print(df_hashtags)\n", | ||
" print(\"\\nPhotos DataFrame:\")\n", | ||
" print(df_photos)\n", | ||
" print(\"\\nComments DataFrame:\")\n", | ||
" print(df_comments)\n", | ||
"else:\n", | ||
" print(profile_data)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e76164f0-64be-4e45-98c8-d5be914b449d", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.6" | ||
}, | ||
"naas": { | ||
"notebook_id": "8c1d59ba9fc141ddf76ab615ec70620884b5be94f4cde842bd75126ac862db52", | ||
"notebook_path": "Instagram/Instagram_Get_stats_from_posts.ipynb" | ||
}, | ||
"papermill": { | ||
"default_parameters": {}, | ||
"environment_variables": {}, | ||
"parameters": {}, | ||
"version": "2.3.3" | ||
}, | ||
"widgets": { | ||
"application/vnd.jupyter.widget-state+json": { | ||
"state": {}, | ||
"version_major": 2, | ||
"version_minor": 0 | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |