admin管理员组文章数量:1287820
I'm trying to query posts from Instagram by providing the hashtag and the time range (since and until dates). I use the recent tags endpoint.
/{tag-name}/media/recent?access_token=ACCESS-TOKEN
My code is written in Node.js using the instagram-node
library (see the inline ments):
// Require the config file
var config = require('../config.js');
// Require and intialize the instagram instance
var ig = require('instagram-node').instagram();
// Set the access token
ig.use({ access_token: config.instagram.access_token });
// We export this function for public use
// hashtag: the hashtag to search for
// minDate: the since date
// maxDate: the until date
// callback: the callback function (err, posts)
module.exports = function (hashtag, minDate, maxDate, callback) {
// Create the posts array (will be concated with new posts from pagination responses)
var posts = [];
// Convert the date objects into timestamps (seconds)
var sinceTime = Math.floor(minDate.getTime() / 1000);
var untilTime = Math.floor(maxDate.getTime() / 1000);
// Fetch the IG posts page by page
ig.tag_media_recent(hashtag, { count: 50 }, function fetchPosts(err, medias, pagination, remaining, limit) {
// Handle error
if (err) {
return callback(err);
}
// Manually filter by time
var filteredByTime = medias.filter(function (currentPost) {
// Convert the created_time string into number (seconds timestamp)
var createdTime = +currentPost.created_time;
// Check if it's after since date and before until date
return createdTime >= sinceTime && createdTime <= untilTime;
});
// Get the last post on this page
var lastPost = medias[medias.length - 1] || {};
// ...and its timestamp
var lastPostTimeStamp = +(lastPost.created_time || -1);
// ...and its timestamp date object
var lastPostDate = new Date(lastPostTimeStamp * 1000);
// Concat the new [filtered] posts to the big array
posts = posts.concat(filteredByTime);
// Show some output
console.log('found ' + filteredByTime.length + ' new items total: ' + posts.length, lastPostDate);
// Check if the last post is BEFORE until date and there are no new posts in the provided range
if (filteredByTime.length === 0 && lastPostTimeStamp <= untilTime) {
// ...if so, we can callback!
return callback(null, posts);
}
// Navigate to the next page
pagination.next(fetchPosts);
});
};
This will start fetching the posts with the most recent to least recent ones, and manually filter the created_time
.
This works, but it's very very inefficient because if we want, for example, to get the posts from one year ago, we have to iterate the pages until that time, and this will use a lot of requests (probably more than 5k / hour which is the rate limit).
Is there a better way to make this query? How to get the Instagram posts by providing the hashtag and the time range?
I'm trying to query posts from Instagram by providing the hashtag and the time range (since and until dates). I use the recent tags endpoint.
https://api.instagram./v1/tags/{tag-name}/media/recent?access_token=ACCESS-TOKEN
My code is written in Node.js using the instagram-node
library (see the inline ments):
// Require the config file
var config = require('../config.js');
// Require and intialize the instagram instance
var ig = require('instagram-node').instagram();
// Set the access token
ig.use({ access_token: config.instagram.access_token });
// We export this function for public use
// hashtag: the hashtag to search for
// minDate: the since date
// maxDate: the until date
// callback: the callback function (err, posts)
module.exports = function (hashtag, minDate, maxDate, callback) {
// Create the posts array (will be concated with new posts from pagination responses)
var posts = [];
// Convert the date objects into timestamps (seconds)
var sinceTime = Math.floor(minDate.getTime() / 1000);
var untilTime = Math.floor(maxDate.getTime() / 1000);
// Fetch the IG posts page by page
ig.tag_media_recent(hashtag, { count: 50 }, function fetchPosts(err, medias, pagination, remaining, limit) {
// Handle error
if (err) {
return callback(err);
}
// Manually filter by time
var filteredByTime = medias.filter(function (currentPost) {
// Convert the created_time string into number (seconds timestamp)
var createdTime = +currentPost.created_time;
// Check if it's after since date and before until date
return createdTime >= sinceTime && createdTime <= untilTime;
});
// Get the last post on this page
var lastPost = medias[medias.length - 1] || {};
// ...and its timestamp
var lastPostTimeStamp = +(lastPost.created_time || -1);
// ...and its timestamp date object
var lastPostDate = new Date(lastPostTimeStamp * 1000);
// Concat the new [filtered] posts to the big array
posts = posts.concat(filteredByTime);
// Show some output
console.log('found ' + filteredByTime.length + ' new items total: ' + posts.length, lastPostDate);
// Check if the last post is BEFORE until date and there are no new posts in the provided range
if (filteredByTime.length === 0 && lastPostTimeStamp <= untilTime) {
// ...if so, we can callback!
return callback(null, posts);
}
// Navigate to the next page
pagination.next(fetchPosts);
});
};
This will start fetching the posts with the most recent to least recent ones, and manually filter the created_time
.
This works, but it's very very inefficient because if we want, for example, to get the posts from one year ago, we have to iterate the pages until that time, and this will use a lot of requests (probably more than 5k / hour which is the rate limit).
Is there a better way to make this query? How to get the Instagram posts by providing the hashtag and the time range?
Share Improve this question asked Nov 24, 2015 at 10:22 phizzyphizzy 9569 silver badges28 bronze badges 9-
Could you just increase the
count
to grab a significantly higher amount of photos at a time to reduce the amount of post fetches? Of course they would be much larger but would something like this be beneficial? – Nick Zuber Commented Dec 7, 2015 at 16:57 -
@NickZ We did try to do that (I was debugging this is the OP) and the max we got was 33 items / request. So, the
count
doesn't really help... :-( Any other ideas? I'm happy to award 50 points to a someone who gives a good answer. :D – Ionică Bizău Commented Dec 8, 2015 at 8:54 - 1 Have you looked into using MIN_TAG_ID and MAX_TAG_ID to iterate quickly towards a target date? I can imagine a method which would query for only one post at a time to find a MAX_TAG_ID that is just before the date required. – sbozzie Commented Dec 8, 2015 at 16:40
- Seeing that there isn't any parameter referenced, you'll need to do this filtering based on the response. Would it be unacceptable to grab the data and drop what you don't need? – Matt Wagner Commented Dec 8, 2015 at 18:50
-
@sbozzie Yes, that's probably what the
pagination.next
does internally. But that's not related to the date (or is it?). If you find a relation between date an tag id, that would be great (e.g.21 March 2013
to be converted into tag id which being used will fetch the posts starting with that date). I guess the tag id is just an internal id, but I'm not sure. – Ionică Bizău Commented Dec 9, 2015 at 8:02
3 Answers
Reset to default 6 +50I think this is the basic idea you're looking for. I'm not overly familiar with Node.js, so this is all in plain javascript. You'll have to modify it to suit your needs and probably make a function out of it.
The idea is to convert an instagram id (1116307519311125603 in this example) to a date and visa versa to enable you to quickly grab a specific point in time rather then backtrack through all results until finding your desired timestamp. The portion of the id after the underscore '_' should be trimmed off as that refers, in some way, to the user IIRC. There are 4 functions in the example that I hope will help you out.
Happy hacking!
//static
var epoch_hour = 3600,
epoch_day = 86400,
epoch_month = 2592000,
epoch_year = 31557600;
//you'll need to set this part up/integrate it with your code
var dataId = 1116307519311125603,
range = 2 * epoch_hour,
count = 1,
tagName = 'cars',
access = prompt('Enter access token:'),
baseUrl = 'https://api.instagram./v1/tags/' +
tagName + '/media/recent?access_token=' + access;
//date && id utilities
function idToEpoch(n){
return Math.round((n / 1000000000000 + 11024476.5839159095) / 0.008388608);
}
function epochToId(n){
return Math.round((n * 0.008388608 - 11024476.5839159095) * 1000000000000);
}
function newDateFromEpoch(n){
var d = new Date(0);
d.setUTCSeconds(n);
return d;
}
function dateToEpoch(d){
return (d.getTime()-d.getMilliseconds())/1000;
}
//start with your id and range; do the figuring
var epoch_time = idToEpoch(dataId),
minumumId = epochToId(epoch_time),
maximumId = epochToId(epoch_time + range),
minDate = newDateFromEpoch(epoch_time),
maxDate = newDateFromEpoch(epoch_time + range);
var newUrl = baseUrl +
'&count=' + count +
'&min_tag_id=' + minumumId +
'&max_tag_id=' + maximumId;
//used for testing
/*alert('Start: ' + minDate + ' (' + epoch_time +
')\nEnd: ' + maxDate + ' (' + (epoch_time +
range) + ')');
window.location = newUrl;*/
To support this excellent answer, an instagram ID is generated via the plpgSQL function:
CREATE OR REPLACE FUNCTION insta5.next_id(OUT result bigint) AS $$
DECLARE
our_epoch bigint := 1314220021721;
seq_id bigint;
now_millis bigint;
shard_id int := 5;
BEGIN
SELECT nextval('insta5.table_id_seq') %% 1024 INTO seq_id;
SELECT FLOOR(EXTRACT(EPOCH FROM clock_timestamp()) * 1000) INTO now_millis;
result := (now_millis - our_epoch) << 23;
result := result | (shard_id << 10);
result := result | (seq_id);
END;
$$ LANGUAGE PLPGSQL;
from Instagram's blog
Despite a similar getting posts process, Data365.co Instagram API, I currently working at, seems to be more suitable and efficient. It does not have a limit of 5,000 posts per hour, and you can specify the period of time for which your need posts in the request itself. Also, the billing will be taken into account only posts from the indicated period. You won't have to pay for data you don't need.
You can see below a task example to download posts by the hashtag bitcoins for the period from January 1, 2021, to January 10, 2021.
POST request: https://api.data365.co/v1.1/instagram/tag/bitcoins/update?max_posts_count=1000&from_date=2021-01-01&to_date=2021-01-10&access_token=TOKEN
A GET request example to get the corresponding list of posts: https://api.data365.co/v1.1/instagram/tag/bitcoins/posts?from_date=2021-01-01&to_date=2021-01-10&max_page_size=100&order_by=date_desc&access_token=TOKEN
More detailed info view in API documentation at https://api.data365.co/v1.1/instagram/docs#tag/Instagram-hashtag-search
本文标签: javascriptQuery Instagram posts by hashtag and time rangeStack Overflow
版权声明:本文标题:javascript - Query Instagram posts by hashtag and time range - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1741309842a2371587.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论