node.js - Node memory issues when import 3 millions of lines from a csv file to mongodb using mongoose (need help to parse the items in batches) -


i'm trying import approx 3 million of lines csv file.

i know how split process laptop can handle amount of data.

by using node --max_old_space_size=8000 eotdimport.js can console.log data.length of array csv-parse generates reading csv file.

but, execute function _importtermcsv(data, eotd) , import mongodb of mongoose lapyop runs out of memory.

the script works fine when insert 100.000 lines, 3.000.000 lines things complicated, memory issues.

please point me in right direction how "divide" csv file or array read , store ex 10.000 records in each batch.

eotdimport.js

const mongoose = require("mongoose"); const parse = require("csv-parse"); const path = require("path"); const fs = require("fs"); const eotd = require("../../models/eotd"); require("dotenv").config({ path: "../../.env" }); mongoose.promise = global.promise;  mongoose.connect(process.env.mongodb_uri, {   keepalive: true,   reconnecttries: number.max_value,   usemongoclient: true }); const p = path.join(__dirname, "/../../", "data", "eotd"); // console.log(p);  const parser = parse({ delimiter: "," }, (err, data) => {   // console.log(data);   console.log("data.length: ", data.length);   _importtermcsv(data, eotd); });  fs.createreadstream(`${p}/term.csv`).pipe(parser);  function _importtermcsv(data, mongoosemodel) {   (let = 1; < data.length; i++) {     const eotd = new eotd();     // term_id: 0161-1#tm-012896#1     eotd.termid = data[i][0];     // concept_id: 0161-1#01-011584#1     eotd.conceptid = data[i][1];     // term: adapter set,cylinder compression tester     eotd.term = data[i][2];     eotd       .save()       .then(() => {         mongoose.disconnect();       })       .catch(err => {         console.log("there error", err);       });   } } 

eotd.js mongoose model

const mongoose = require("mongoose");  // defining mongoose schema const eotdschema = new mongoose.schema({   conceptid: string,   termid: string,   term: string,   reference: string,   statusterm: string,   language: string,   originatingorganization: string,   definitionid: string,   definition: string,   statusconcept: string });  // concept type: class, other, property, .., // concept_id: 0161-1#01-011584#1 // term_id: 0161-1#tm-012896#1 // term: adapter set,cylinder compression tester // reference: fiig=a23800;inc=47525 // definition_id: 0161-1#df-416067#1 // definition: collection of adapter (1), cylinder compression tester of different types , sizes. may include storage case. // language_id: 0161-1#lg-000001#1 // language: en - // originating organization: dlis // organization_id: 0161-1#og-002462#1  // status term: active // status concept: active  // concept id can have terms , definitions in multiple different langugages.  // create mongoose model module.exports = mongoose.model("eotd", eotdschema); 

the first lines term.csv file

"0161-1#tm-1093704#1","0161-1#01-047609#1","shoes,girls'" "0161-1#tm-1087811#1","0161-1#01-040002#1","oxalic acid,anhydrous,reagent" "0161-1#tm-1082528#1","0161-1#01-033928#1","urea,sterile,usp" "0161-1#tm-1072706#1","0161-1#01-023453#1","cryptenamine tannates,methyclothiazide,and reserpine tablets" "0161-1#tm-1088099#1","0161-1#01-040308#1","potassium metaperiodate,reagent" "0161-1#tm-1065473#1","0161-1#01-014827#1","runner,wood" "0161-1#tm-1091065#1","0161-1#01-044529#1","castanets" "0161-1#tm-1070433#1","0161-1#01-021013#1","beta-lactamase detection discs" 

the first lines data array, csv-parse generates.

data =  [ [ '0161-1#tm-1093704#1', '0161-1#01-047609#1', 'shoes,girls\'' ],   [ '0161-1#tm-1087811#1',     '0161-1#01-040002#1',     'oxalic acid,anhydrous,reagent' ],   [ '0161-1#tm-1082528#1',     '0161-1#01-033928#1',     'urea,sterile,usp' ],   [ '0161-1#tm-1072706#1',     '0161-1#01-023453#1',     'cryptenamine tannates,methyclothiazide,and reserpine tablets' ],   [ '0161-1#tm-1088099#1',     '0161-1#01-040308#1',     'potassium metaperiodate,reagent' ],   [ '0161-1#tm-1065473#1', '0161-1#01-014827#1', 'runner,wood' ],   [ '0161-1#tm-1091065#1', '0161-1#01-044529#1', 'castanets' ] ] 


Comments

Popular posts from this blog

angular - Ionic slides - dynamically add slides before and after -

Add a dynamic header in angular 2 http provider -

minify - Minimizing css files -