[SciPy-user] Fastest way to save/load fairly large (sparse) matrices

David Warde-Farley dwf@cs.toronto....
Wed Jul 18 16:16:19 CDT 2007


Hi Andrew,

This is exactly what I needed. Thanks so much!

David


On 18-Jul-07, at 5:06 PM, Andrew Straw wrote:

> I'm not doing so well today. The file I attached was outdated (with  
> no test() function). I attach the correct file here.
>
> -Andrew "working on too many computers at once for his own good" Straw
> # Author: Andrew D. Straw
> # Copyright (C) 2005-2007, California Institute of Technology
> #
> # Permission is hereby granted, free of charge, to any person  
> obtaining a copy
> # of this software and associated documentation files (the  
> "Software"), to deal
> # in the Software without restriction, including without limitation  
> the rights
> # to use, copy, modify, merge, publish, distribute, sublicense, and/ 
> or sell
> # copies of the Software, and to permit persons to whom the  
> Software is
> # furnished to do so, subject to the following conditions:
> #
> # The above copyright notice and this permission notice shall be  
> included in
> # all copies or substantial portions of the Software.
> #
> # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,  
> EXPRESS OR
> # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF  
> MERCHANTABILITY,
> # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT  
> SHALL THE
> # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR  
> OTHER
> # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,  
> ARISING FROM,
> # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER  
> DEALINGS IN
> # THE SOFTWARE.
>
>
>
> import os
> import numpy
> import scipy
> import scipy.sparse
> from cgtypes import vec3 # HACK
>
> # include this in files:
> # import os
> # ashelf_datadir = os.path.split(__file__)[0]
>
> def save_as_python(fd, var, varname, fname_extra=None):
>     if fname_extra is None:
>         fname_extra = ''
>     fname_prefix = varname + fname_extra
>     buf = get_code_for_var( varname, fname_prefix, var)
>     fd.write(buf)
>
> def get_code_for_var( name, fname_prefix, var):
>     if type(var)==numpy.ndarray:
>         fname = fname_prefix + '.ashelf'
>         var.tofile( fname )
>
>         shape = var.shape
>
>         bufs = []
>         bufs.append(
>             '%s = numpy.fromfile(file=os.path.join(ashelf_datadir,"% 
> s"),dtype=numpy.dtype(%s))'%(name,fname,repr(var.dtype.str)))
>         bufs.append(
>             '%s.shape = %s'%(name,repr(shape,)))
>         return '\n'.join(bufs)+'\n'
>
>     if isinstance(var,scipy.sparse.csc_matrix):
>         bufs = []
>         bufs.append(
>             get_code_for_var( '%s_tmp_sparse_data'%name,  
> fname_prefix+'_data', var.data )[:-1])
>         bufs.append(
>             get_code_for_var( '%s_tmp_sparse_indices'%name,  
> fname_prefix+'_indices', var.indices )[:-1])
>         bufs.append(
>             get_code_for_var( '%s_tmp_sparse_indptr'%name,  
> fname_prefix+'_indptr', var.indptr )[:-1])
>         bufs.append(
>             '%s = scipy.sparse.csc_matrix((%s,%s,%s))'%(
>             name,
>             '%s_tmp_sparse_data'%name,
>             '%s_tmp_sparse_indices'%name,
>             '%s_tmp_sparse_indptr'%name,
>             ))
>         bufs.append(
>             '%s.shape = %s'%(name,repr(var.shape)))
>         bufs.append(
>             'del %s_tmp_sparse_data'%name)
>         bufs.append(
>             'del %s_tmp_sparse_indices'%name)
>         bufs.append(
>             'del %s_tmp_sparse_indptr'%name)
>         return '\n'.join(bufs)+'\n'
>
>     if isinstance(var,scipy.sparse.csr_matrix):
>         bufs = []
>         bufs.append(
>             get_code_for_var( '%s_tmp_sparse_data'%name,  
> fname_prefix+'_data', var.data )[:-1])
>         bufs.append(
>             get_code_for_var( '%s_tmp_sparse_colind'%name,  
> fname_prefix+'_colind', var.colind )[:-1])
>         bufs.append(
>             get_code_for_var( '%s_tmp_sparse_indptr'%name,  
> fname_prefix+'_indptr', var.indptr )[:-1])
>         bufs.append(
>             '%s = scipy.sparse.csr_matrix((%s,%s,%s))'%(
>             name,
>             '%s_tmp_sparse_data'%name,
>             '%s_tmp_sparse_colind'%name,
>             '%s_tmp_sparse_indptr'%name,
>             ))
>         bufs.append(
>             '%s.shape = %s'%(name,repr(var.shape)))
>         bufs.append(
>             'del %s_tmp_sparse_data'%name)
>         bufs.append(
>             'del %s_tmp_sparse_colind'%name)
>         bufs.append(
>             'del %s_tmp_sparse_indptr'%name)
>         return '\n'.join(bufs)+'\n'
>     if 1:
>         ra = repr(var)
>         try:
>             cmp = eval(ra)
>         except:
>             raise RuntimeError("eval failed")
>         else:
>             if cmp==var:
>                 return '%s = '%(name,)+ra+'\n'
>             else:
>                 raise RuntimeError("failed conversion")
>
> def test():
>     bigmat = numpy.zeros( (2000, 20000), dtype=numpy.float64 )
>     for i in range(20):
>         for j in range(200,300):
>             bigmat[i,j]=i*j
>     spmat = scipy.sparse.csc_matrix(bigmat)
>     fname = 'test_ashelf_data.py'
>     fd = open(fname,'wb')
>     fd.write( '# Automatically generated by ashelf.py\n')
>     fd.write( 'import numpy\n')
>     fd.write( 'import scipy.sparse\n')
>     fd.write( 'import os\n')
>     fd.write( 'ashelf_datadir = os.path.split(__file__)[0]\n')
>     save_as_python(fd, spmat, 'test_spmat')
>     fd.close()
>
>     locals = {'__file__':fname}
>     execfile(fname,{},locals) # loads test_spmat
>     assert numpy.allclose( spmat.data, locals['test_spmat'].data )
>     assert numpy.allclose( spmat.indices, locals 
> ['test_spmat'].indices )
>     assert numpy.allclose( spmat.indptr, locals['test_spmat'].indptr )
>     print 'sparse matrix saved and loaded OK'
>
> if __name__=='__main__':
>     test()
>
> _______________________________________________
> SciPy-user mailing list
> SciPy-user@scipy.org
> http://projects.scipy.org/mailman/listinfo/scipy-user



More information about the SciPy-user mailing list