Albert ARIBAUD | 48e910f | 2013-09-11 15:52:51 +0200 | [diff] [blame] | 1 | #! /usr/bin/python |
| 2 | ######################################################################## |
| 3 | # |
| 4 | # reorder and reformat a file in columns |
| 5 | # |
| 6 | # this utility takes lines from its standard input and reproduces them, |
| 7 | # partially reordered and reformatted, on its standard output. |
| 8 | # |
| 9 | # It has the same effect as a 'sort | column -t', with the exception |
| 10 | # that empty lines, as well as lines which start with a '#' sign, are |
| 11 | # not affected, i.e. they keep their position and formatting, and act |
| 12 | # as separators, i.e. the parts before and after them are each sorted |
| 13 | # separately (but overall field widths are computed across the whole |
| 14 | # input). |
| 15 | # |
| 16 | # Options: |
| 17 | # -i: |
| 18 | # --ignore-case: |
| 19 | # Do not consider case when sorting. |
| 20 | # -d: |
| 21 | # --default: |
| 22 | # What to chage empty fields to. |
| 23 | # -s <N>: |
| 24 | # --split=<N>: |
| 25 | # Treat only the first N whitespace sequences as separators. |
| 26 | # line content after the Nth separator will count as only one |
| 27 | # field even if it contains whitespace. |
| 28 | # Example : '-s 2' causes input 'a b c d e' to be split into |
| 29 | # three fields, 'a', 'b', and 'c d e'. |
| 30 | # |
| 31 | # boards.cfg requires -ids 6. |
| 32 | # |
| 33 | ######################################################################## |
| 34 | |
| 35 | import sys, getopt, locale |
| 36 | |
| 37 | # ensure we sort using the C locale. |
| 38 | |
| 39 | locale.setlocale(locale.LC_ALL, 'C') |
| 40 | |
| 41 | # check options |
| 42 | |
| 43 | maxsplit = 0 |
| 44 | ignore_case = 0 |
| 45 | default_field ='' |
| 46 | |
| 47 | try: |
| 48 | opts, args = getopt.getopt(sys.argv[1:], "id:s:", |
| 49 | ["ignore-case","default","split="]) |
| 50 | except getopt.GetoptError as err: |
| 51 | print str(err) # will print something like "option -a not recognized" |
| 52 | sys.exit(2) |
| 53 | |
| 54 | for o, a in opts: |
| 55 | if o in ("-s", "--split"): |
| 56 | maxsplit = eval(a) |
| 57 | elif o in ("-i", "--ignore-case"): |
| 58 | ignore_case = 1 |
| 59 | elif o in ("-d", "--default"): |
| 60 | default_field = a |
| 61 | else: |
| 62 | assert False, "unhandled option" |
| 63 | |
| 64 | # collect all lines from standard input and, for the ones which must be |
| 65 | # reformatted and sorted, count their fields and compute each field's |
| 66 | # maximum size |
| 67 | |
| 68 | input_lines = [] |
| 69 | field_width = [] |
| 70 | |
| 71 | for line in sys.stdin: |
| 72 | # remove final end of line |
| 73 | input_line = line.strip('\n') |
| 74 | if (len(input_line)>0) and (input_line[0] != '#'): |
| 75 | # sortable line: split into fields |
| 76 | fields = input_line.split(None,maxsplit) |
| 77 | # if there are new fields, top up field_widths |
| 78 | for f in range(len(field_width), len(fields)): |
| 79 | field_width.append(0) |
| 80 | # compute the maximum witdh of each field |
| 81 | for f in range(len(fields)): |
| 82 | field_width[f] = max(field_width[f],len(fields[f])) |
| 83 | # collect the line for next stage |
| 84 | input_lines.append(input_line) |
| 85 | |
| 86 | # run through collected input lines, collect the ones which must be |
| 87 | # reformatted and sorted, and whenever a non-reformattable, non-sortable |
| 88 | # line is met, sort the collected lines before it and append them to the |
| 89 | # output lines, then add the non-sortable line too. |
| 90 | |
| 91 | output_lines = [] |
| 92 | sortable_lines = [] |
| 93 | for input_line in input_lines: |
| 94 | if (len(input_line)>0) and (input_line[0] != '#'): |
| 95 | # this line should be reformatted and sorted |
| 96 | input_fields = input_line.split(None,maxsplit) |
| 97 | output_fields = []; |
| 98 | # reformat each field to this field's column width |
| 99 | for f in range(len(input_fields)): |
| 100 | output_field = input_fields[f]; |
| 101 | output_fields.append(output_field.ljust(field_width[f])) |
| 102 | # any missing field is set to default if it exists |
| 103 | if default_field != '': |
| 104 | for f in range(len(input_fields),len(field_width)): |
| 105 | output_fields.append(default_field.ljust(field_width[f])) |
| 106 | # join fields using two spaces, like column -t would |
| 107 | output_line = ' '.join(output_fields); |
| 108 | # collect line for later |
| 109 | sortable_lines.append(output_line) |
| 110 | else: |
| 111 | # this line is non-sortable |
| 112 | # sort collected sortable lines |
| 113 | if ignore_case!=0: |
| 114 | sortable_lines.sort(key=lambda x: str.lower(locale.strxfrm(x))) |
| 115 | else: |
| 116 | sortable_lines.sort(key=lambda x: locale.strxfrm(x)) |
| 117 | # append sortable lines to the final output |
| 118 | output_lines.extend(sortable_lines) |
| 119 | sortable_lines = [] |
| 120 | # append non-sortable line to the final output |
| 121 | output_lines.append(input_line) |
| 122 | # maybe we had sortable lines pending, so append them to the final output |
| 123 | if ignore_case!=0: |
| 124 | sortable_lines.sort(key=lambda x: str.lower(locale.strxfrm(x))) |
| 125 | else: |
| 126 | sortable_lines.sort(key=lambda x: locale.strxfrm(x)) |
| 127 | output_lines.extend(sortable_lines) |
| 128 | |
| 129 | # run through output lines and print them, except rightmost whitespace |
| 130 | |
| 131 | for output_line in output_lines: |
| 132 | print output_line.rstrip() |