[Numpy-svn] r5047 - in trunk/numpy/lib: . tests

numpy-svn@scip... numpy-svn@scip...
Sat Apr 19 16:45:44 CDT 2008


Author: ptvirtan
Date: 2008-04-19 16:45:35 -0500 (Sat, 19 Apr 2008)
New Revision: 5047

Modified:
   trunk/numpy/lib/_datasource.py
   trunk/numpy/lib/tests/test__datasource.py
Log:
Fix bug #738 and add corresponding tests.

lib._datasource.DataSource.abspath now sanitizes path names more carefully,
making sure that all file paths reside in destdir, also on Windows.  (Where
both '/' and os.sep function as path separators, as far as os.path.join is
concerned.)



Modified: trunk/numpy/lib/_datasource.py
===================================================================
--- trunk/numpy/lib/_datasource.py	2008-04-18 19:59:42 UTC (rev 5046)
+++ trunk/numpy/lib/_datasource.py	2008-04-19 21:45:35 UTC (rev 5047)
@@ -287,8 +287,23 @@
         if len(splitpath) > 1:
             path = splitpath[1]
         scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
-        return os.path.join(self._destpath, netloc, upath.strip(os.sep))
+        netloc = self._sanitize_relative_path(netloc)
+        upath = self._sanitize_relative_path(upath)
+        return os.path.join(self._destpath, netloc, upath)
 
+    def _sanitize_relative_path(self, path):
+        """Return a sanitised relative path for which
+        os.path.abspath(os.path.join(base, path)).startswith(base)
+        """
+        last = None
+        path = os.path.normpath(path)
+        while path != last:
+            last = path
+            # Note: os.path.join treats '/' as os.sep
+            path = path.lstrip(os.sep).lstrip('/')
+            path = path.lstrip(os.pardir).lstrip('..')
+        return path
+
     def exists(self, path):
         """Test if ``path`` exists.
 

Modified: trunk/numpy/lib/tests/test__datasource.py
===================================================================
--- trunk/numpy/lib/tests/test__datasource.py	2008-04-18 19:59:42 UTC (rev 5046)
+++ trunk/numpy/lib/tests/test__datasource.py	2008-04-19 21:45:35 UTC (rev 5047)
@@ -29,6 +29,9 @@
 http_fakepath = 'http://fake.abc.web/site/'
 http_fakefile = 'fake.txt'
 
+malicious_files = ['/etc/shadow', '../../shadow',
+                   '..\\system.dat', 'c:\\windows\\system.dat']
+
 magic_line = 'three is the magic number'
 
 
@@ -165,7 +168,8 @@
 
     def test_ValidHTTP(self):
         scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl())
-        local_path = os.path.join(self.tmpdir, netloc, upath.strip(os.sep))
+        local_path = os.path.join(self.tmpdir, netloc,
+                                  upath.strip(os.sep).strip('/'))
         self.assertEqual(local_path, self.ds.abspath(valid_httpurl()))
 
     def test_ValidFile(self):
@@ -178,7 +182,8 @@
 
     def test_InvalidHTTP(self):
         scheme, netloc, upath, pms, qry, frg = urlparse(invalid_httpurl())
-        invalidhttp = os.path.join(self.tmpdir, netloc, upath.strip(os.sep))
+        invalidhttp = os.path.join(self.tmpdir, netloc,
+                                   upath.strip(os.sep).strip('/'))
         self.assertNotEqual(invalidhttp, self.ds.abspath(valid_httpurl()))
 
     def test_InvalidFile(self):
@@ -190,8 +195,34 @@
         # Test filename with complete path
         self.assertNotEqual(invalidfile, self.ds.abspath(tmpfile))
 
+    def test_sandboxing(self):
+        tmpfile = valid_textfile(self.tmpdir)
+        tmpfilename = os.path.split(tmpfile)[-1]
 
-class TestRespositoryAbspath(NumpyTestCase):
+        tmp_path = lambda x: os.path.abspath(self.ds.abspath(x))
+        
+        assert tmp_path(valid_httpurl()).startswith(self.tmpdir)
+        assert tmp_path(invalid_httpurl()).startswith(self.tmpdir)
+        assert tmp_path(tmpfile).startswith(self.tmpdir)
+        assert tmp_path(tmpfilename).startswith(self.tmpdir)
+        for fn in malicious_files:
+            assert tmp_path(http_path+fn).startswith(self.tmpdir)
+            assert tmp_path(fn).startswith(self.tmpdir)
+    
+    def test_windows_os_sep(self):
+        orig_os_sep = os.sep
+        try:
+            os.sep = '\\'
+            self.test_ValidHTTP()
+            self.test_ValidFile()
+            self.test_InvalidHTTP()
+            self.test_InvalidFile()
+            self.test_sandboxing()
+        finally:
+            os.sep = orig_os_sep
+
+
+class TestRepositoryAbspath(NumpyTestCase):
     def setUp(self):
         self.tmpdir = mkdtemp()
         self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
@@ -203,11 +234,27 @@
     def test_ValidHTTP(self):
         scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl())
         local_path = os.path.join(self.repos._destpath, netloc, \
-                                  upath.strip(os.sep))
+                                  upath.strip(os.sep).strip('/'))
         filepath = self.repos.abspath(valid_httpfile())
         self.assertEqual(local_path, filepath)
 
+    def test_sandboxing(self):
+        tmp_path = lambda x: os.path.abspath(self.repos.abspath(x))
+        assert tmp_path(valid_httpfile()).startswith(self.tmpdir)
+        for fn in malicious_files:
+            assert tmp_path(http_path+fn).startswith(self.tmpdir)
+            assert tmp_path(fn).startswith(self.tmpdir)
+        
+    def test_windows_os_sep(self):
+        orig_os_sep = os.sep
+        try:
+            os.sep = '\\'
+            self.test_ValidHTTP()
+            self.test_sandboxing()
+        finally:
+            os.sep = orig_os_sep
 
+
 class TestRepositoryExists(NumpyTestCase):
     def setUp(self):
         self.tmpdir = mkdtemp()



More information about the Numpy-svn mailing list