Front loading a wordpress blog from instagram
My fiancé, like many others, has become jaded with the current social media landscape, and after a bit of nudging from a professional nerd has decided to set up her own little corner of the internet. Part of her site will be a blog - ostensibly a log of her experiments with new techniques and materials in her studio practice.
The problem, as with any new project, was the blank page problem. She asked if it was possible to take the years of content she’s built up on her instagram and migrate it to her new blog. The following is a description of the bag of hacks I put together to achieve this.
Stage 1, grab the content
First we needed to grab all the content from her existing instagram profile. There are a bunch of tools that claim to do this, but the best I found was instaLoader, a command line tool that not only downloads the images from any public account, but also the captions, hashtags, comments etc.
pip3 install instaloader
instaloader profile YOURPROFILEHERE
Let this run for a few minutes (which runs at a reasonable rate as to not get nerfed by any instagram rate limits), and you’ll soon have a directory with a bunch of jpegs and json files that describe each post.
Stage 2, get a wordpress access token
To us the API endpoints we’re going to hit, in order to automate the creation of the posts, we are going to need an oauth access token.
MEDIA_ENDPOINT = f"https://public-api.wordpress.com/rest/v1.1/sites/{WORDPRESS_SITE}/media/new"
POSTS_ENDPOINT = f"https://public-api.wordpress.com/rest/v1.1/sites/{WORDPRESS_SITE}/posts/new"
In order to get a token (bearing in mind that as described above, this entire thing is a bag of hacks);
-
Go to https://developer.wordpress.com/apps/
Create an app and note:-
client_id -
client_secret -
redirect_uri(you can use https://localhost for testing)
-
-
Construct a browser URL like this (replace YOUR_CLIENT_ID and YOUR_REDIRECT_URI):
Open that URL in your browser, log in, approve the app, and it’ll redirect you to something like:
https://localhost/?code=abcd1234
-
Copy that code value and exchange it for a token:
curl -X POST https://public-api.wordpress.com/oauth2/token \ -d "client_id=YOUR_CLIENT_ID" \ -d "client_secret=YOUR_CLIENT_SECRET" \ -d "redirect_uri=YOUR_REDIRECT_URI" \ -d "grant_type=authorization_code" \ -d "code=abcd1234"You’ll get JSON with your access token.
That token can then be reused for your upload and post-creation operations.
Step 3, assemble to wordpress posts
I’m using python here, but obviously use whatever you like:
import os, json, requests, traceback
from datetime import datetime
ACCESS_TOKEN = "YOUR_OAUTH_TOKEN"
WORDPRESS_SITE = "your-site.wordpress.com"
DATA_DIR = "./instagram_data"
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
MEDIA_ENDPOINT = f"https://public-api.wordpress.com/rest/v1.1/sites/{WORDPRESS_SITE}/media/new"
POSTS_ENDPOINT = f"https://public-api.wordpress.com/rest/v1.1/sites/{WORDPRESS_SITE}/posts/new"
def get_image_path(json_filename):
base = json_filename.replace(".json", "")
jpg = os.path.join(DATA_DIR, f"{base}.jpg")
if os.path.exists(jpg):
return [jpg]
images, n = [], 1
while True:
alt = os.path.join(DATA_DIR, f"{base}_{n}.jpg")
if os.path.exists(alt):
images.append(alt)
n += 1
else:
break
return images or None
def upload_image(media_path):
try:
with open(media_path, "rb") as f:
r = requests.post(MEDIA_ENDPOINT, headers=HEADERS, files={"media[]": f})
if r.status_code not in (200, 201):
print(f"✗ Upload failed: {r.status_code} {r.text}")
return None
data = r.json()
media = data.get("media", [])
if media:
url = media[0].get("URL")
print(f"✓ Uploaded: {os.path.basename(media_path)} → {url}")
return url
print("✗ No media URL in response")
return None
except Exception as e:
print(f"✗ Error uploading {media_path}: {e}")
return None
def create_post(title, content, media_urls, post_date):
date_str = datetime.fromtimestamp(post_date).isoformat()
body = "".join([f'<img src="{u}" />' for u in media_urls]) + f"<p>{content}</p>"
payload = {"title": title, "content": body, "status": "publish", "date": date_str}
try:
r = requests.post(POSTS_ENDPOINT, headers=HEADERS, json=payload)
if r.status_code in (200, 201):
print(f"✓ Created post: {r.json().get('URL')}")
return True
print(f"✗ Failed to create post: {r.status_code} {r.text}")
return False
except Exception as e:
print(f"✗ Error creating post: {e}")
return False
def process_json(json_file):
path = os.path.join(DATA_DIR, json_file)
print(f"\n📸 Processing {json_file}")
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
node = data.get("node", {})
iphone = node.get("iphone_struct", {})
caption = iphone.get("caption")
if caption is None:
caption = "Posted from Instagram"
elif isinstance(caption, dict):
caption = caption.get("text", "Posted from Instagram")
post_date = node.get("date")
if not post_date:
print("✗ No date found")
return False
title = f"Instagram Post - {datetime.fromtimestamp(post_date).strftime('%B %d, %Y')}"
images = get_image_path(json_file)
if not images:
print("✗ No matching images found")
return False
urls = []
for img in images:
url = upload_image(img)
if not url:
return False
urls.append(url)
return create_post(title, caption, urls, post_date)
except Exception as e:
print(f"✗ Error: {e}")
print(traceback.format_exc())
return False
def main():
files = sorted([f for f in os.listdir(DATA_DIR) if f.endswith(".json")])
print(f"Found {len(files)} Instagram posts")
success, fail = 0, 0
for f in files:
if process_json(f): success += 1
else: fail += 1
print(f"\nDone — Success: {success}, Failures: {fail}")
if __name__ == "__main__":
main()
This works great, and over the course of about 5 minutes uploaded all the images to wordpress, and created blog posts with the correct timestamps, giving this brand new blog a catalog of posts going back over a number of years.
There was however, one problem.
Step 4, dealing with Gutenberg Blocks
The script above creates a bunch of posts, but I forgot that wordpress uses Gutenberg blocks for layout these days. Because the script just creates posts with a bunch of raw tags, the images looked awful, having been basically ignored by the wordpress theme styling. Rather than delete all the posts and update the original script, the script below crawls across an existing wordpress site and converts any
tags it finds to the block format. BeautifulSoup might be overkill for this task, but it works and it works well.
Note, the get_posts endpoint doesn’t like to return more than 100 posts at a time, so we process them in batches.
import requests, time, traceback, re, os
from bs4 import BeautifulSoup
ACCESS_TOKEN = "YOUR_OAUTH_TOKEN"
WORDPRESS_SITE = "your-site.wordpress.com"
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
POSTS_ENDPOINT = f"https://public-api.wordpress.com/rest/v1.1/sites/{WORDPRESS_SITE}/posts"
def get_all_posts():
posts, page = [], 1
while True:
url = f"{POSTS_ENDPOINT}?number=100&status=publish&page={page}"
r = requests.get(url, headers=HEADERS)
if r.status_code != 200: break
data = r.json().get("posts", [])
if not data: break
posts.extend(data)
page += 1
time.sleep(0.2)
print(f" Total posts fetched: {len(posts)}")
return posts
def convert_to_blocks(html):
soup = BeautifulSoup(html, "html.parser")
blocks = []
for img in soup.find_all("img"):
url = img.get("src")
if url:
blocks.append(
f"<!-- wp:image -->\n<figure class='wp-block-image'><img src='{url}' alt=''/></figure>\n<!-- /wp:image -->"
)
for p in soup.find_all("p"):
text = p.get_text(strip=True)
if text:
blocks.append(
f"<!-- wp:paragraph -->\n<p>{text}</p>\n<!-- /wp:paragraph -->"
)
if not blocks:
return html, False
return "\n\n".join(blocks), True
def update_post(post_id, title, content):
url = f"{POSTS_ENDPOINT}/{post_id}"
r = requests.post(url, headers=HEADERS, json={"content": content})
if r.status_code in (200, 201):
print(f" Updated post {post_id}: {title[:30]}...")
return True
print(f" Failed to update {post_id}: {r.status_code}")
return False
def main():
print("Converting posts to Gutenberg blocks...")
posts = get_all_posts()
success = fail = skip = 0
for i, p in enumerate(posts, 1):
pid, title, html = p.get("ID"), p.get("title"), p.get("content")
print(f"\nProcessing {i}/{len(posts)}: {title[:40]}...")
new_html, changed = convert_to_blocks(html)
if not changed:
print("Skipped (no paragraphs/images)")
skip += 1
continue
if update_post(pid, title, new_html):
success += 1
else:
fail += 1
time.sleep(0.5)
print(f"\n Done. Success: {success}, Skipped: {skip}, Failed: {fail}")
if __name__ == "__main__":
main()
Step 5, bask in glory and don’t forget to be your persons first RSS subscriber
With her new blog up and running, I’m super excited to see what she gets up to over there. I added her RSS feed to netnewswire, and I can’t wait for the first new post to show up!