admin管理员组

文章数量:1405509

I'm trying to improve my knowledge regarding memoization in javascript. I have created a memoization function(I think..)

I've got an array of changes(a change log) made to items. Each item in the array contains a reference-id(employeeId) to whom made the edit. Looking something like this.

const changeLog = [
  {
    id: 1,
    employeeId: 1,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 2,
    employeeId: 2,
    field: 'anotherField',
    oldValue: '20',
    newValue: '100',
  },
  ...
]

I've also got an array containing each employee, looking something like this:

const employees = [
    {
        name: 'Joel Abero',
        id: 1
    },
    {
        name: 'John Doe',
        id: 2
    },
    {
        name: 'Dear John',
        id: 3
    }
]

To find the employee who made the change I map over each item in the changeLog and find where employeeId equals id in the employees-array. Both of these arrays contains 500+ items, I've just pasted fragments. Below I pasted my memoize helper.

1) How can I perform a test to see which of these two run the fastest?
2) Is this a proper way to use memoization?
3) Is there a rule of thumb when to use memoization? Or should I use it as often as I can?

const employees = [
	{
		name: 'Joel Abero',
		id: 1
	},
	{
		name: 'John Doe',
		id: 2
	},
	{
		name: 'Dear John',
		id: 3
	}
]

const changeLog = [
  {
    id: 1,
    employeeId: 1,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 2,
    employeeId: 2,
    field: 'anotherField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 3,
    employeeId: 3,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 4,
    employeeId: 3,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 5,
    employeeId: 3,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  }
]

function findEditedByEmployee (employeeId) {
  return employees.find(({ id }) => id === employeeId)
}

function editedByWithMemoize () {
  let employeesSavedInMemory = {}
  return function(employeeId) {
    if(employeeId in employeesSavedInMemory) {
      console.log("from memory")
      return employeesSavedInMemory[employeeId]
    }
    console.log("not from memory")
    const findEditedBy = findEditedByEmployee(employeeId)
    employeesSavedInMemory[findEditedBy.id] = {name: findEditedBy.name }
    return findEditedBy
  }
}

const memoizedEmployee = editedByWithMemoize();

// with memoization
const changeLogWithEmployeesMemoized = changeLog.map( log => {
  const employeeName = memoizedEmployee(log.employeeId);
  return {
    ...log,
    employeeName: employeeName.name
  }
})

// without memoization
const changeLogWithEmployees = changeLog.map( log => {
  const editedBy = findEditedByEmployee(log.employeeId);
  return {
    ...log,
    employeeName: editedBy.name
  }
})

console.log('memoized', changeLogWithEmployeesMemoized)
console.log('not memoized', changeLogWithEmployees)

I'm trying to improve my knowledge regarding memoization in javascript. I have created a memoization function(I think..)

I've got an array of changes(a change log) made to items. Each item in the array contains a reference-id(employeeId) to whom made the edit. Looking something like this.

const changeLog = [
  {
    id: 1,
    employeeId: 1,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 2,
    employeeId: 2,
    field: 'anotherField',
    oldValue: '20',
    newValue: '100',
  },
  ...
]

I've also got an array containing each employee, looking something like this:

const employees = [
    {
        name: 'Joel Abero',
        id: 1
    },
    {
        name: 'John Doe',
        id: 2
    },
    {
        name: 'Dear John',
        id: 3
    }
]

To find the employee who made the change I map over each item in the changeLog and find where employeeId equals id in the employees-array. Both of these arrays contains 500+ items, I've just pasted fragments. Below I pasted my memoize helper.

1) How can I perform a test to see which of these two run the fastest?
2) Is this a proper way to use memoization?
3) Is there a rule of thumb when to use memoization? Or should I use it as often as I can?

const employees = [
	{
		name: 'Joel Abero',
		id: 1
	},
	{
		name: 'John Doe',
		id: 2
	},
	{
		name: 'Dear John',
		id: 3
	}
]

const changeLog = [
  {
    id: 1,
    employeeId: 1,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 2,
    employeeId: 2,
    field: 'anotherField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 3,
    employeeId: 3,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 4,
    employeeId: 3,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  },
  {
    id: 5,
    employeeId: 3,
    field: 'someField',
    oldValue: '0',
    newValue: '100',
  }
]

function findEditedByEmployee (employeeId) {
  return employees.find(({ id }) => id === employeeId)
}

function editedByWithMemoize () {
  let employeesSavedInMemory = {}
  return function(employeeId) {
    if(employeeId in employeesSavedInMemory) {
      console.log("from memory")
      return employeesSavedInMemory[employeeId]
    }
    console.log("not from memory")
    const findEditedBy = findEditedByEmployee(employeeId)
    employeesSavedInMemory[findEditedBy.id] = {name: findEditedBy.name }
    return findEditedBy
  }
}

const memoizedEmployee = editedByWithMemoize();

// with memoization
const changeLogWithEmployeesMemoized = changeLog.map( log => {
  const employeeName = memoizedEmployee(log.employeeId);
  return {
    ...log,
    employeeName: employeeName.name
  }
})

// without memoization
const changeLogWithEmployees = changeLog.map( log => {
  const editedBy = findEditedByEmployee(log.employeeId);
  return {
    ...log,
    employeeName: editedBy.name
  }
})

console.log('memoized', changeLogWithEmployeesMemoized)
console.log('not memoized', changeLogWithEmployees)

Share edited Jan 29, 2020 at 9:13 VLAZ 29.2k9 gold badges63 silver badges84 bronze badges asked Jan 29, 2020 at 9:11 henrik123henrik123 1,68313 silver badges19 bronze badges
Add a ment  | 

3 Answers 3

Reset to default 2

I'll try to answer each of your questions:

1) How can I perform a test to see which of these two run the fastest?

The best way is just a simple for loop. Take for example a fake API request:

const fakeAPIRequest = id => new Promise(r => setTimeout(r, 100, {id}))

This will take 100ms to plete on request. Using memoization, we can avoid making this 100ms request by checking if we've made this request before:

const cache = {}
const memoizedRequest = async (id) => {
  if (id in cache) return Promise.resolve(cache[id])
  return cache[id] = await fakeAPIRequest(id)
}

Here's a working example:

const fakeAPIRequest = id => new Promise(r => setTimeout(r, 100, {id}))

const cache = {}

const memoizedRequest = async (id) => {
  if (id in cache) return Promise.resolve(cache[id])
  return cache[id] = await fakeAPIRequest(id)
}

const request = async (id) => await fakeAPIRequest(id)

const test = async (name, cb) => {
  console.time(name)
  for (let i = 50; i--;) {
    await cb()
  }
  console.timeEnd(name)
  
}

test('memoized', async () => await memoizedRequest('test'))
test('normal', async () => await request('test'))

2) Is this a proper way to use memoization?

I'm not entirely sure what you mean by this, but think of it as short-term caching. Should your memo call include an API request, it could be great for non-changing data, saving plenty of time, but on the other hand, if the data is subject to change within a short period of time, then memoization can be a bad idea, meaning it will shortly be outdated.

If you are making many many calls to this function, it could eat up memory depending on how big the return data is, but this factor is down to implementation, not "a proper way".

3) Is there a rule of thumb when to use memoization? Or should I use it as often as I can?

More often than not, memoization is overkill - since puters are extremely fast, it can often boil down to just saving milliseconds - If you are only calling the function even just a few times, memoization provides little to no benefit. But I do keep emphasising API requests, which can take long periods of time. If you start using a memoized function, you should strive to use it everywhere where possible. Like mentioned before, though, it can eat up memory quickly depending on the return data.


One last point about memoization is that if the data is already client side, using a map like Nina suggested is definitely a much better and more efficient approach. Instead of looping each time to find the object, it loops once to transform the array into an object (or map), allowing you to access the data in O(1) time. Take an example, using find this time instead of the fakeAPI function I made earlier:

const data = [{"id":0},{"id":1},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6},{"id":7},{"id":8},{"id":9},{"id":10},{"id":11},{"id":12},{"id":13},{"id":14},{"id":15},{"id":16},{"id":17},{"id":18},{"id":19},{"id":20},{"id":21},{"id":22},{"id":23},{"id":24},{"id":25},{"id":26},{"id":27},{"id":28},{"id":29},{"id":30},{"id":31},{"id":32},{"id":33},{"id":34},{"id":35},{"id":36},{"id":37},{"id":38},{"id":39},{"id":40},{"id":41},{"id":42},{"id":43},{"id":44},{"id":45},{"id":46},{"id":47},{"id":48},{"id":49},{"id":50},{"id":51},{"id":52},{"id":53},{"id":54},{"id":55},{"id":56},{"id":57},{"id":58},{"id":59},{"id":60},{"id":61},{"id":62},{"id":63},{"id":64},{"id":65},{"id":66},{"id":67},{"id":68},{"id":69},{"id":70},{"id":71},{"id":72},{"id":73},{"id":74},{"id":75},{"id":76},{"id":77},{"id":78},{"id":79},{"id":80},{"id":81},{"id":82},{"id":83},{"id":84},{"id":85},{"id":86},{"id":87},{"id":88},{"id":89},{"id":90},{"id":91},{"id":92},{"id":93},{"id":94},{"id":95},{"id":96},{"id":97},{"id":98},{"id":99}]
const cache = {}

const findObject = id => data.find(o => o.id === id)
const memoizedFindObject = id => id in cache ? cache[id] : cache[id] = findObject(id)

const map = new Map(data.map(o => [o.id, o]))
const findObjectByMap = id => map.get(id)

const list = Array(50000).fill(0).map(() => Math.floor(Math.random() * 100))
const test = (name, cb) => {
  console.time(name)
  for (let i = 50000; i--;) {
    cb(list[i])
  }
  console.timeEnd(name)
}

test('memoized', memoizedFindObject)
test('normal', findObject)
test('map', findObjectByMap)

All in all, memoization is a great tool, very similar to caching. It provides a big speed up on heavy calculations or long network requests, but can prove ineffective if used infrequently.

I would create a Map in advance and get the object from the map for an update.

If map does not contain a wanted id, create a new object and add it to employees and to the map.

const
    employees = [{ name: 'Joel Abero', id: 1 }, { name: 'John Doe', id: 2 }, { name: 'Dear John', id: 3 }],
    changeLog = [{ id: 1, employeeId: 1, field: 'someField', oldValue: '0', newValue: '100' }, { id: 2, employeeId: 2, field: 'anotherField', oldValue: '20', newValue: '100' }],
    map = employees.reduce((map, o) => map.set(o.id, o), new Map);

for (const { id, field, newValue } of changeLog) {
    let object = map.get(id);
    if (object) {
        object[field] = newValue;
    } else {
        let temp = { id, [field]: newValue };
        employees.push(temp)
        map.set(id, temp);
    }
}

console.log(employees);
.as-console-wrapper { max-height: 100% !important; top: 0; }

Your memoization process is faulty!

You don't return objects with the same shape

When you don't find an employee in the cache, then you look it up and return the entire object, however, you only memoize part of the object:

employeesSavedInMemory[findEditedBy.id] = {name: findEditedBy.name }

So, when you find the employee in cache, you return a cut-down version of the data:

const employees = [ { name: 'Joel Abero', id: 1 }, { name: 'John Doe', id: 2 }, { name: 'Dear John', id: 3 } ]

function findEditedByEmployee (employeeId) {
  return employees.find(({ id }) => id === employeeId)
}

function editedByWithMemoize () {
  let employeesSavedInMemory = {}
  return function(employeeId) {
    if(employeeId in employeesSavedInMemory) {
      console.log("from memory")
      return employeesSavedInMemory[employeeId]
    }
    console.log("not from memory")
    const findEditedBy = findEditedByEmployee(employeeId)
    employeesSavedInMemory[findEditedBy.id] = {name: findEditedBy.name }
    return findEditedBy
  }
}

const memoizedEmployee = editedByWithMemoize();


const found = memoizedEmployee(1);
const fromCache = memoizedEmployee(1);

console.log("found:", found); //id + name
console.log("fromCache:", fromCache);//name

You get different data back when calling the same function with the same parameters.

You don't return the same objects

Another big problem is that you create a new object - even if you change to get a plete copy, the memoization is not transparent:

const employees = [ { name: 'Joel Abero', id: 1 }, { name: 'John Doe', id: 2 }, { name: 'Dear John', id: 3 } ]

function findEditedByEmployee (employeeId) {
  return employees.find(({ id }) => id === employeeId)
}

function editedByWithMemoize () {
  let employeesSavedInMemory = {}
  return function(employeeId) {
    if(employeeId in employeesSavedInMemory) {
      console.log("from memory")
      return employeesSavedInMemory[employeeId]
    }
    console.log("not from memory")
    const findEditedBy = findEditedByEmployee(employeeId)
    employeesSavedInMemory[findEditedBy.id] = { ...findEditedBy } //make a copy of all object properties
    return findEditedBy
  }
}

const memoizedEmployee = editedByWithMemoize();
memoizedEmployee(1)

const found = memoizedEmployee(1);
const fromCache = memoizedEmployee(1);

console.log("found:", found);           //id + name
console.log("fromCache:", fromCache);   //id + name
console.log("found === fromCache :", found === fromCache); // false

The result is basically the same you get "different" data, in that the objects are not the same one. So, for example, if you do:

const employees = [ { name: 'Joel Abero', id: 1 }, { name: 'John Doe', id: 2 }, { name: 'Dear John', id: 3 } ]

function findEditedByEmployee (employeeId) {
  return employees.find(({ id }) => id === employeeId)
}

function editedByWithMemoize () {
  let employeesSavedInMemory = {}
  return function(employeeId) {
    if(employeeId in employeesSavedInMemory) {
      console.log("from memory")
      return employeesSavedInMemory[employeeId]
    }
    console.log("not from memory")
    const findEditedBy = findEditedByEmployee(employeeId)
    employeesSavedInMemory[findEditedBy.id] = { ...findEditedBy } //make a copy of all object properties
    return findEditedBy
  }
}

const memoizedEmployee = editedByWithMemoize();

const original = employees[0];

const found = memoizedEmployee(1);
found.foo = "hello";
console.log("found:", found);             //id + name + foo

const fromCache = memoizedEmployee(1);
console.log("fromCache 1:", fromCache);   //id + name

fromCache.bar = "world";
console.log("fromCache 2:", fromCache);   //id + name + bar

console.log("original:", original);       //id + name + foo

Compare with a normal implementation

I'll use memoize from Lodash but there are many other generic implementations and they still work the same way, so this is only for reference:

const { memoize } = _;

const employees = [ { name: 'Joel Abero', id: 1 }, { name: 'John Doe', id: 2 }, { name: 'Dear John', id: 3 } ]

function findEditedByEmployee (employeeId) {
  return employees.find(({ id }) => id === employeeId)
}

const memoizedEmployee = memoize(findEditedByEmployee);

const original = employees[0];

const found = memoizedEmployee(1);
console.log("found 1:", found);           //id + name

found.foo = "hello";
console.log("found 2:", found);           //id + name + foo

const fromCache = memoizedEmployee(1);
console.log("fromCache 1:", fromCache);   //id + name + foo

fromCache.bar = "world";
console.log("fromCache 2:", fromCache);   //id + name + foo + bar

console.log("original:", original);       //id + name + foo + bar

console.log("found === fromCache :", found === fromCache); //true
<script src="https://cdn.jsdelivr/npm/[email protected]/lodash.min.js"></script>

Just a demonstration that the memoization is pletely transparent and does not produce any odd or unusual behaviour. Using the memoized function is exactly the same as the normal function in terms of effects. The only difference is the caching but there is no impact on how the function behaves.

Onto the actual questions:

How can I perform a test to see which of these two run the fastest?

Honestly, and personally - you shouldn't. A correct implementation of memoization has known properties. A linear search also has known properties. So, testing for speed is testing two known properties of both algorithms.

Let's dip into pure logic here - we have two things to consider:

  • the implementation is correct (let's call this P)
  • properties of implementation are correct (let's call this Q)

We can definitely say that "If the implementation is correct, then properties of implementation are correct", transformable to "if P, then Q" or formally P -> Q. Were we to go in the opposite direction Q -> P and try to test the known properties to confirm the implementation is correct, then we are mitting the fallacy of affirming the consequent.

We can indeed observe that testing the speed is not even testing the solution for correctness. You could have incorrect implementation of memoization yet it would exhibit the same speed property of O(n) lookup once and O(1) on repeat reads as correct memoization. So, the test Q -> P will fail.

Instead, you should test the implementation for correctness, if you can prove that, then you can deduce that you'd have constant speed on repeat reads. And O(1) access is going to be (in most cases, especially this one), faster than O(n) lookup.

Consequently, if you don't need to prove correctness, then you can take the rest for granted. And if you use a known implementation of memoization, then you don't need to test your library.

With all that said, there is something you might still need to be aware of. The caching during memoization relies on creating a correct key for the cached item. And this could potentially have a big, even if constant, overhead cost depending on how the key is being derived. So, for example, a lookup for something near the start of the array might take 10ms yet creating the key for the cache might take 15ms, which means that O(1) would be slower. Slower than some cases.

The correct test to verify speed would normally be to check how much time it takes (on average) to lookup the first item in the array, the last item in the array, something from the middle of the array then check how much time it takes to fetch something from cache. Each of these has to be ran several times to ensure you don't get a random spike of speed either up or down.

But I'd have more to say later*

2) Is this a proper way to use memoization?

Yes. Again, assuming proper implementation, this is how you'd do it - memoize a function and then you get a lot of benefits for caching.

With that said, you can see from the Lodash implementation that you can just generalise the memoization implementation and apply it to any function, instead of writing a memoized version of each. This is quite a big benefit, since you only need to test one memoization function. Instead, if you have something like findEmployee(), findDepartment(), and findAddress() functions which you want to cache the results of, then you need to test each memoization implementation.

3) Is there a rule of thumb when to use memoization? Or should I use it as often as I can?

Yes, you should use it as often as you can* (with a huge asterisk)

* (huge asterisk)

This is the biggest asterisk I know how to make using markdown (outside just embedding images). I could go for a slightly bigger one but alas.

You have to determine when you can use it, in order to use it when you can. I'm not just saying this to be confusing - you shouldn't just be slapping memoized functions everywhere. There are situations when you cannot use them. And that's what I alluded to at the end of answering the first question - I wanted to talk about this in a single place:

You have to tread very carefully to verify what your actual usage is. If you have a million items in an array and only the first 10 are looked up faster than being fetched from cache, then there is 0.001% of items that would have no benefit from caching. In that case - you get a benefit from caching...or do you? If you only do a couple of reads per item, and you're only looking up less than a quarter of the items, then perhaps caching doesn't give you a good long term benefit. And if you look up each item exactly two times, then you're doubling your memory consumption for honestly quite trivial improvement of speed overall. Yet, what if you're not doing in-memory lookup from an array but something like a network request (e.g., database read)? In that case caching even for a single use could be very valuable.

You can see how a single detail can swing wildly whether you should use memoization or not. And often times it's not even that clear when you're initially writing the application, since you don't even know how often you might end up calling a function, what value you'd feed it, nor how often you'd call it with the same values over and over again. Even if you have an idea of what the typical usage might be, you still will need a real environment to test with, instead of just calling a non-memoized and a memoized version of a function in isolation.

Eric Lippert has an an amazing piece on performance testing that mostly boils down to - when performance matters, try to test real application with real data, and real usage. Otherwise your benchmark might be off for all sorts of reasons.

Even if memoization is clearly "faster" you have to consider memory usage. Here is a slightly silly example to illustrate memoization eating up more memory than necessary:

const { memoize } = _;

//stupid recursive function that removes 1 from `b` and 
//adds 1 to `a` until it finds the total sum of the two
function sum (a, b) {
  if(b)
    return sum(a + 1, b - 1)
  //only log once to avoid spamming the logs but show if it's called or not
  console.log("sum() finished");
  return a;
}

//memoize the function
sum = memoize(sum);

const result = sum(1, 999);
console.log("result:", result);


const resultFromCache1 = sum(1, 999); //no logs as it's cached
console.log("resultFromCache1:", resultFromCache1);

const resultFromCache2 = sum(999, 1); //no logs as it's cached
console.log("resultFromCache2:", resultFromCache2);

const resultFromCache3 = sum(450, 550); //no logs as it's cached
console.log("resultFromCache3:", resultFromCache3);

const resultFromCache4 = sum(42, 958); //no logs as it's cached
console.log("resultFromCache4:", resultFromCache4);
<script src="https://cdn.jsdelivr/npm/[email protected]/lodash.min.js"></script>

This will put one thousand cached results in memory. Yes, the function memoized is silly and doing a lot of unnecessary calls, which means there is a lot of memory overhead. Yet at the same time, if we re-call it with any arguments that sum up to 1000, then we immediately get the result, without having to do any recursion.

You can easily have similar real code, even if there is no recursion involved - you might end up calling some function a lot of times with a lot of different inputs. This will populate the cache with all results and yet whether that is useful or not is still up in the air.

So, if you can you should be using memoization. The biggest problem is finding out if you can.

本文标签: Javascript memoize find arrayStack Overflow