1 | # Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ). |
---|
2 | # This software may be distributed under the terms of the |
---|
3 | # Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt |
---|
4 | |
---|
5 | """ |
---|
6 | textParser.py |
---|
7 | ============= |
---|
8 | |
---|
9 | A set of functions to parse text file data into lists, strings, |
---|
10 | reals, integers etc. |
---|
11 | |
---|
12 | """ |
---|
13 | |
---|
14 | # Standard library imports |
---|
15 | import re |
---|
16 | import string |
---|
17 | |
---|
18 | # Local imports |
---|
19 | from nappy.utils.right_strip import * |
---|
20 | |
---|
21 | # Global variables |
---|
22 | pattnNoQuotes = re.compile("^[\"'].*\1$") |
---|
23 | |
---|
24 | |
---|
25 | def readItemFromLine(line, rttype=str): |
---|
26 | """ |
---|
27 | Reads an item of type ``rttype`` from ``line``. |
---|
28 | """ |
---|
29 | line = rightStripCurlyBraces(line) |
---|
30 | rtitem = pattnNoQuotes.sub("", line.strip()) |
---|
31 | |
---|
32 | if rttype is not str: |
---|
33 | rtitem = rttype(rtitem) |
---|
34 | return rtitem |
---|
35 | |
---|
36 | def readItemsFromLine(line, nitems=None, rttype=str): |
---|
37 | """ |
---|
38 | Reads ``nitems`` items of type ``rttype`` from ``line``. |
---|
39 | """ |
---|
40 | line = rightStripCurlyBraces(line) |
---|
41 | rtitems = re.split(r"\s+", line.strip()) |
---|
42 | |
---|
43 | if nitems and len(rtitems) != nitems: |
---|
44 | raise "Incorrect number of items (%s) found in line: \n'%s'" % (nitems, line) |
---|
45 | if rttype is not str: |
---|
46 | rtitems = [rttype(x) for x in rtitems] |
---|
47 | return rtitems |
---|
48 | |
---|
49 | def readItemsFromLines(lines, nitems, rttype=str): |
---|
50 | """ |
---|
51 | Reads ``nitems`` items of type ``rttype`` from ``lines`` |
---|
52 | """ |
---|
53 | lines = [rightStripCurlyBraces(line) for line in lines] |
---|
54 | rtitems = [] |
---|
55 | for line in lines: |
---|
56 | rtitems = rtitems + [readItemFromLine(line, rttype)] |
---|
57 | if rttype is not str: |
---|
58 | rtitems = [rttype(x) for x in rtitems] |
---|
59 | return rtitems |
---|
60 | |
---|
61 | def readItemsFromUnknownLines(object, nitems, rttype=str): |
---|
62 | """ |
---|
63 | Reads from an unknown number of lines until n items have been collected. |
---|
64 | The 'object' argument can be a filehandle (i.e. obj=open('name.ext', 'r')) |
---|
65 | or a string wrapped in a StringIO object (i.e. obj=StringIO.StringIO('abc')). |
---|
66 | The 'object' argument can also be a list, in which case the partially used/read object is |
---|
67 | also returned. |
---|
68 | """ |
---|
69 | |
---|
70 | rtitems = [] |
---|
71 | lines = [] |
---|
72 | |
---|
73 | if type(object) == type([2,3]): |
---|
74 | |
---|
75 | while len(rtitems) < nitems: |
---|
76 | nextitem = object[0] |
---|
77 | object = object[1:] |
---|
78 | items = rightStripCurlyBraces(nextitem).strip().split() |
---|
79 | lines.append(items) |
---|
80 | (rtitems, extras) = (rtitems + items[:nitems], items[nitems:]) |
---|
81 | |
---|
82 | else: |
---|
83 | while len(rtitems) < nitems: |
---|
84 | items = rightStripCurlyBraces(object.readline()).strip().split() |
---|
85 | lines.append(items) |
---|
86 | (rtitems, extras) = (rtitems + items[:nitems], items[nitems:]) |
---|
87 | |
---|
88 | if len(extras) > 0: |
---|
89 | raise Exception("Could not split " + `len(lines)` + " lines exactly into required number (" + `nitems` + ") of items: \n" + str(lines)) |
---|
90 | |
---|
91 | if rttype is not str: |
---|
92 | rtitems = [rttype(x) for x in rtitems] |
---|
93 | |
---|
94 | if type(object) == type([1,2]): |
---|
95 | return (rtitems, object) |
---|
96 | else: |
---|
97 | return rtitems |
---|