There are 3 common file formats to save data: .txt (plain text), .csv (comma ','
separated values) and .tsv (tab '\t'
separated values). We are going to talk about how to load and save data in these formats.
Python
1. NumPy (Numeric Python)
If there is only a bunch of numbers to save, NumPy will be the fastest and simplest choice for you. I would like to give an example first:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Created by WW on Jan. 23, 2020
# All rights reserved.
#
import numpy as np
def main():
#=============================================================
''' Load plain text file (.txt) as np.array '''
data = np.loadtxt(r"lambda.txt")
#print(data)
x = data[:,0] # get the first column of data
y = data[:,1] # get the second column of data
#print(x)
#print(y)
#=============================================================
''' Load .csv (comma separated values) file as np.array '''
np.savetxt("lambda.csv", data, fmt="%g", delimiter=',')
data1 = np.loadtxt(r"lambda.csv", delimiter=',')
#print(data1)
#=============================================================
''' Load .tsv (tab separated values) file as np.array '''
np.savetxt("lambda.tsv", data, fmt="%g", delimiter='\t')
data2 = np.loadtxt(r"lambda.tsv", delimiter='\t')
#print(data2)
if __name__ == "__main__":
main()
, which is very easy to understand.
2. Pandas (Python Data Analysis Library)
If there are line and column index included in your data to save, pandas will be a better choice:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Created by WW on Jan. 26, 2020
# All rights reserved.
#
import numpy as np
import pandas as pd
def main():
#=====================================================================
# Create a pandas.DataFrame.
df = pd.DataFrame(np.arange(16).reshape(4, 4),
index = ['row1', 'row2', 'row3', 'row4'],
columns = ['col1', 'col2', 'col3', 'col4'])
'''
:param index: line index
:param columns: column index
'''
print(df, '\n')
# Save pd.DataFrame as .csv file.
df.to_csv("pd_test.csv", index = True, sep = ',')
'''
:param index: Bool, default = False (line index not saved in CSV file)
'''
#=====================================================================
# Use pandas to read .csv file.
data = pd.read_csv('pd_test.csv', header = 0, index_col = 0, names=['a', 'b', 'c',' d'])
'''
:param header: {0, None}, default = 0 (read first line as column index)
:param index_col: {0, None}, default = False (read first column not as line index)
:param names: List, change column index (no matter header = 0 or None) to names
'''
X = data.columns # get column index
Y = data.index # get line index
print(X)
print(Y)
'''
Notice:
X.dtype = 'object', and Y.dtype = 'object' ('object' is similar to 'string')
if you need to convert string to numbers:
X = X.astype(np.float64), Y = Y.astype(np.float64)
'''
print(data, '\n')
# Convert the values (exclude line and column index) of pd.DataFrame -> np.array .
print(data.values)
if __name__ == "__main__":
main()
The above code is also very easy to understand.
C++
As well, I’d like to give an example first. However, it’s much more complicated than Python NumPy and pandas, you have to understand some C++ concepts like vector and stream, etc.
//
// csv_load.cpp
// CPP
//
// Created by WW on 2020/01/23.
// Copyright © 2020 Wang Wei. All rights reserved.
//
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <sstream> // stringstream
#include <stdio.h> // sscanf
using namespace std;
int main(int argc, char **argv)
{
string fileName("lambda.csv");
vector< vector<string> > dataString; // data in 2-D String vector
ifstream fin(fileName);
if(!fin)
{
cout << "Error: <" << fileName << "> is not existing!\n";
/*
ofstream fout(fileName);
fout << S[i][j] << ",";
fout.close();
*/
}
else
{
cout << "Loading <" << fileName << "> ...\n";
string line; // every line data in String
while (getline(fin, line))
{
// Convert "string" to "stringstream" for getline() function
stringstream ss(line);
string data; // data in String
vector<string> linedataString; // every line data in String vector
while (getline(ss, data, ',')) // delimiter=','
linedataString.push_back(data);
dataString.push_back(linedataString);
}
}
fin.close();
int dimension[2];
dimension[0] = dataString.size();
dimension[1] = dataString[0].size();
cout << "Data dimension: " << dimension[0] << " x " << dimension[1] << endl;
//Data in 2-D vector S(m*n)
vector< vector<double> > S(dimension[0], vector<double>(dimension[1]));
for(int i = 0; i < dimension[0]; i++)
{
for(int j = 0; j < dimension[1]; j++)
{
// c_str() covert "string" to "char *"
// sscanf() convert "char *" to "%lf" ("double")
sscanf(dataString[i][j].c_str(), "%lf", &S[i][j]);
cout << S[i][j] << "\t";
}
cout << endl;
}
return 0;
}
For .txt and .tsv file, it’s much more easy to load (their delimiter is '\t'
). We don’t need to load the file into string and split the data with delimiter ','
, and then convert string to double. You can just do loop fin >> S[i][j];
, and the data will be read as double array or vector.
To write a .txt, .csv, and .tsv file is very simple in C++, you just do loop fout << S[i][j] << ',';
, and the delimiter can be '\t'
, '\n'
or ','
.

