operation pylint
The point of this project is to see whats required to get a simple script up to 10 out of 10 according to pylint. When I was running test trials of pylint I hadn't been using Git as I should and subsequently, lost the code that gave me 0 out of 10. Lesson learned.
In this example I used four functions from a module I have been working on (engtools: english tools). This module includes four functions: 1) clean a string of text, 2) count stopwords in a string, 3) create a dictionary of stopwords, and 4) calculate the percentage of stopwords in a string.
Heres its current state:
# starting point
import stop_words, re
def clean_my_grammar(x):
words=x.split(' ')
cleaned_text=""
#for each word that is not an apostrophe or hyphen, remove anything thats not in the alphabet
for word in words:
# grep letters, apostrophes and hyphens
cleaned_word=re.sub(r'[^a-zA-Z_-]+', "", word)
if cleaned_text == "":
cleaned_text = cleaned_word
else:
cleaned_text = cleaned_text + " " + cleaned_word
return cleaned_text
def count_stopwords(x):
x = clean_my_grammar(x)
count=0
stopword_list = stop_words.get_stop_words('en')
# loop through each word of the string
for i in x.split(' '):
# match each word to list of stopwords
if i.lower() in stopword_list:
# add to counter
count +=1
# return number of vowels
return count
def create_stopword_dictionary(x):
# set variables
x=clean_my_grammar(x)
stopword_dictionary=dict()
stopword_list =stop_words.get_stop_words('en')
# loop through each word
for i in x.split(' '):
#match each word to stopwords
if i.lower() in stopword_list:
stopword_dictionary[i.lower()]=stopword_dictionary.get(i.lower(),0) + 1
return stopword_dictionary
def percentage_stopwords(x):
# set variables
number_of_stopwords = count_stopwords(x)
total_words = len(x.split(' '))
# return percentage as decimal value
return float(number_of_stopwords)/float(total_words)
Score: 2.90
Messages
--------
+------------------+------------+
|message id |occurrences |
+==================+============+
|bad-whitespace |10 |
+------------------+------------+
|missing-docstring |5 |
+------------------+------------+
|invalid-name |4 |
+------------------+------------+
|mixed-indentation |2 |
+------------------+------------+
|line-too-long |1 |
+------------------+------------+
Step 1: remove bad-whitespace
- add exactly one space after comma
- add exactly one space around expressions
Score: 6.13
+------------------+------------+
|message id |occurrences |
+==================+============+
|missing-docstring |5 |
+------------------+------------+
|invalid-name |4 |
+------------------+------------+
|mixed-indentation |2 |
+------------------+------------+
|line-too-long |1 |
+------------------+------------+
Step 2: add missing docstrings to functions
Score: 7.42
Messages
--------
+------------------+------------+
|message id |occurrences |
+==================+============+
|invalid-name |4 |
+------------------+------------+
|mixed-indentation |2 |
+------------------+------------+
|missing-docstring |1 |
+------------------+------------+
|line-too-long |1 |
+------------------+------------+
Step 3: change invalid names
Score: 8.71
Messages
--------
+------------------+------------+
|message id |occurrences |
+==================+============+
|mixed-indentation |2 |
+------------------+------------+
|missing-docstring |1 |
+------------------+------------+
|line-too-long |1 |
+------------------+------------+
Step 4: catch mixed indentation
Score: 9.03
Messages
--------
+------------------+------------+
|message id |occurrences |
+==================+============+
|line-too-long |2 |
+------------------+------------+
|missing-docstring |1 |
+------------------+------------+
Step 5: give the script a docstring
Score: 9.35
- added a docstring above the functions
- Making sure import statements go after the string
Messages
--------
+--------------+------------+
|message id |occurrences |
+==============+============+
|line-too-long |2 |
+--------------+------------+
Step 6: modify the lines that are too long
Score: 10.00
Global evaluation
-----------------
Your code has been rated at 10.00/10
Step 7: Add comments
Final version....
#! /usr/bin/env python
"""
This is a module that contains word processing functions to
parse a string of text written in english.
"""
import stop_words, re
def clean_my_grammar(sentence):
"""
Return a single string that has been cleaned.
Words that are hypenated or contain underscores are kept intact.
"""
# create variables
words = sentence.split(' ')
cleaned_text = ""
#loop through words for cleaning
for word in words:
# grep letters, underscores and hyphens
# keeps words with underscores and hyphens intact
cleaned_word = re.sub(r'[^a-zA-Z_-]+', "", word)
# add first word to empty string
if cleaned_text == "":
cleaned_text = cleaned_word
else:
# join additional words to the string
cleaned_text = cleaned_text + " " + cleaned_word
# return text for further processing
return cleaned_text
def count_stopwords(sentence):
"""
Return a count of stopwords in a string.
Stopwords are determined by stop_words module.
"""
# Apply the first function "clean_my_grammar" to remove unwanted text
sentence = clean_my_grammar(sentence)
# create variables
count = 0
stopword_list = stop_words.get_stop_words('en')
# loop through each word of the string
for i in sentence.split(' '):
# match each lowercase word to list of stopwords
if i.lower() in stopword_list:
# add to counter
count += 1
# return number of vowels
return count
def create_stopword_dictionary(sentence):
"""
Return a dictionary containing the frequency of each stopword in a string.
"""
# Apply the first function "clean_my_grammar" to remove unwanted text
sentence = clean_my_grammar(sentence)
# set variables
stopword_dictionary = dict()
stopword_list = stop_words.get_stop_words('en')
# loop through each word
for i in sentence.split(' '):
#match each word to stopwords
if i.lower() in stopword_list:
stopword_dictionary[i.lower()] = \
stopword_dictionary.get(i.lower(), 0) + 1
return stopword_dictionary
def percentage_stopwords(sentence):
"""
Return the percentage of words that are stopwords in a single string.
"""
# set variables
number_of_stopwords = count_stopwords(sentence)
total_words = len(sentence.split(' '))
# return percentage as a decimal value
return float(number_of_stopwords)/float(total_words)