source: nappy/trunk/textParser.py @ 2179

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/textParser.py@2179
Revision 2179, 2.7 KB checked in by domlowe, 12 years ago (diff)

Improved performance of two Nappy functions - readItemsFromUnknownLines and _checkForBlankLines

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6textParser.py
7=============
8
9A set of functions to parse text file data into lists, strings,
10reals, integers etc.
11
12"""
13
14import re
15import string
16
17pattnNoQuotes=re.compile("^[\"'].*\1$")
18
19def readItemFromLine(line, rttype=str):
20    rtitem=pattnNoQuotes.sub("", line.strip())
21    if rttype is not str:
22        rtitem = rttype(rtitem)   
23    return rtitem
24
25def readItemsFromLine(line, nitems=None, rttype=str):
26    rtitems=re.split(r"\s+", line.strip())
27    if nitems and len(rtitems)!=nitems:
28        raise "Incorrect number of items (%s) found in line: \n'%s'" % (nitems, line)
29    if rttype is not str:
30        rtitems = [rttype(x) for x in rtitems]
31    return rtitems
32
33def readItemsFromLines(lines, nitems, rttype=str):
34    rtitems=[]
35    for line in lines:
36        rtitems=rtitems+[readItemFromLine(line, rttype)]
37    if rttype is not str:
38        rtitems = [rttype(x) for x in rtitems]
39    return rtitems
40
41def readItemsFromUnknownLines(object, nitems, rttype=str):
42    """Reads from an unknown number of lines until n items have been collected.
43    The 'object' argument can be a filehandle (i.e. obj=open('name.ext', 'r'))
44    or a string wrapped in a StringIO object (i.e. obj=StringIO.StringIO('abc')).
45    The 'object' argument can also be a list, in which case the unused lines are
46    also returned.
47    """
48    rtitems=[]
49    lines=[]       
50    if type(object) is list: 
51        nextitem=object[0]
52        del object[0]
53        while len(rtitems)<nitems:   
54            #items=re.split(r"\s+", nextitem.strip())
55            items=nextitem.strip().split()
56            lines.append(items)
57            (rtitems,extras)=(rtitems+items[:nitems],items[nitems:])
58        if len(extras)>0:
59            raise "Could not split %s lines exactly into required number (%s) of items: \n%s" % (len(lines), nitems, lines)
60        if rttype is not str:
61            rtitems = [rttype(x) for x in rtitems]
62        return (rtitems, object)
63    else:
64        lines =[]
65        while len(rtitems)<nitems:
66            #items=re.split(r"\s+", object.readline().strip())
67            items=object.readline().strip().split()
68            lines.append(items)
69            (rtitems,extras)=(rtitems+items[:nitems], items[nitems:])
70        if len(extras)>0:
71            raise "Could not split %s lines exactly into required number (%s) of items: \n%s" % (len(lines), nitems, lines)
72        if rttype is not str:
73            rtitems = [rttype(x) for x in rtitems]
74        return rtitems
75
76
77
78
79   
80   
81
Note: See TracBrowser for help on using the repository browser.