Piping Python using Bash

Aim

Pipe in to Python using Bash.

Background

This got out of hand. I originally wanted to see how to use a python script at the command line so that I could pipe data in using Bash. I started by creating the python script which was to count how many words started with each letter of the alphabet. The input data didnt matter at this point because I was going to "pipe" it in using Bash. I used Project Gutenberg books for testing. After making the hodge podge script to parse text I decided to make an Ascii styled chart. Why I am not really sure, it seemed like a good idea at the time. This is what I did.

Execution

# Use pygmentize for colours
alias ccat='pygmentize -g'

# Piping Python using Bash
ls catalog | grep "sherlock" | 
while read book; do python counter.py -f "catalog/$book" | 
ccat; done ;

Output (snail race chart)

Total number of words starting with each letter in The Adventures of Sherlock Holmes. Note that I didnt remove any copy right information, header and footer content or extracts.

Counter.py

def spacer(value, maxValue):
    """
    Helper funcion to add space after letter count.
    This provides a flat edge for the ascii chart.
    """
    return len(str(maxValue)) - len(str(value))


def scaler(value, maxValue):
    """
    Helper function to scale letter count.
    This provides the height of each bar in the 
    ascii chart.
    """
    return round(value/maxValue*50)


def main(opt):
    # open file
    with open(opt.file, 'r', 
       encoding='utf-8') as infile:

       # read content of file
       fileContent = infile.readlines()

       # alphabet
       alphabet = list(string.ascii_lowercase) 

       # dict to count words starting with each letter
       alphabetCounter = dict()

       # print each word
       for line in fileContent:
           for word in line.split(' '):
               # make each word lowercase
               word = word.lower()
               word = word.replace(',', '')
               word = word.replace('.', '')
               word = word.replace('\n', '')
               word = word.replace(':', '')
               word = word.replace('"', '')
               word = word.replace('?', '')
               word = word.replace(';', '')
               word = word.replace('-', '')
               word = word.replace('_', '')

               if (word != " " and len(word) > 1 and
                   word[0] in alphabet and word != '\n'):
                   firstLetter = word[0]
                   alphabetCounter[firstLetter] =\
                     alphabetCounter.get(firstLetter, 0) + 1

       # make sure all letters are in the dictionary
       for letter in alphabetCounter:
           alphabetCounter[letter] =\
               alphabetCounter.get(letter, 0) 

       # get maximum value for scaling 
       maxValue = max(alphabetCounter.items(),\
           key=operator.itemgetter(1))[0]       
       maxValue = alphabetCounter[maxValue]

       # ascii print
       for key in sorted(alphabetCounter.keys()):
           print("%s: %s %s|%s" % (
               key, 
               alphabetCounter[key],
               # note the use of helper functions here...
               ' ' * spacer(alphabetCounter[key], maxValue),
               '>' * scaler(alphabetCounter[key], maxValue)+'@'
               )
            )



if __name__ == "__main__":
    import string
    import re
    import operator
    from optparse import OptionParser

    # add flags to use in the terminal
    parser = OptionParser()

    # add a list
    parser.add_option("-f", "--file", dest="file",
        help="File to process", metavar="FILE")

    # collect opts and args
    (options, args) = parser.parse_args()

    # run main function
    main(options)

results matching ""

    No results matching ""