3/06/2015

Split large multi header .csv file to multiple files in Java

Split large multi header .csv file to multiple files in Java

Problem
----------------
if we have large .csv file with multiple header something like below. In multi Thread ETL process we need to split this file to multiple files on Header.

AAA
Column1 | Column2 | Column3 ..................
row1
row2
....
;;;;;

BBB
Column1 | Column2 | Column3 ..................
row1
row2








for the above format I used the regular expression is split point. 

String regex = "^.*[A-Z]$"

you can change this expression in below method as per your column header in below function before intended to use in your problem.

The below function go through the large file line by line and as header encounter move the lines to new file. Save function do the save or creation of new file.  

private String parentFolder = "C:/ETL/copy/";
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
public void split(String fileName) throws IOException {
  try {
   // opens the file in a string buffer
   File headFile=new  File(parentFolder + fileName + ".csv");
   BufferedReader bufferedReader = new BufferedReader(new FileReader(headFile));
   StringBuffer stringBuffer = new StringBuffer();

   // performs the splitting
   String line;
   int row = 0;
   int counter = 1;
   while ((line = bufferedReader.readLine()) != null) {
    String regex = "^.*[A-Z]$";
    boolean isMatch = Pattern.matches(regex, line.trim());
    if (isMatch) {
     logger.info(line);
    }
    if (isMatch && row != 0) {
     saveFile(stringBuffer, fileName + counter + ".csv",headFile.lastModified());
     counter++;
     stringBuffer = new StringBuffer();
     stringBuffer.append(line);
     stringBuffer.append(NEWLINE);
    } else {
     stringBuffer.append(line);
     stringBuffer.append(NEWLINE);
    }

    row++;
   }
   saveFile(stringBuffer,fileName + counter + ".csv",headFile.lastModified());
   bufferedReader.close();

  } catch (IOException e) {
   e.printStackTrace();
  }
 }


 
1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


  
 private void saveFile(StringBuffer stringBuffer, String filename,long lastModifiedTime)
   throws IOException {
  File file = new File(parentFolder + "splittedFile");
  file.mkdir();

  FileWriter output = null;
  try {
   file = new File(parentFolder + "splittedFile/" + filename);
   file.setLastModified(lastModifiedTime);
   output = new FileWriter(file);
   output.write(stringBuffer.toString());
   // System.out.println("file " + file.getAbsolutePath() +
   // " written");
  } catch (IOException e) {
   e.printStackTrace();
  } finally {

   try {
    output.close();
   } catch (IOException e) {
    // do nothing the file wasn't been even opened
   }
  }
 }

1 comment: