Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Code Block
titleterm.py

import re

class 

{{
import re

...

StoplistFileError(Exception):

...


    "Used to indicate that a stoplist file breaks one or more formatting rules."

...



class StopList(object):

...


    def __init__(self, file=None):

...


        self.__stopwords = set()

...


	if file:

...


	    self.load_file(file)

...


	
    @property
    def wordset(self):

...


	return self.__stopwords.copy()

...



    def __iter__(self):

...


	self.__iter_index = 0

...


	self.__stopwords_iter = list(self.__stopwords)

...


	return

...

 self
    
    def next(self):

...


	if self.__iter_index == len(self.__stopwords_iter):

...


	    raise StopIteration
	next_item = self.__stopwords_iter[self.__iter_index

...

]
	self.__iter_index += 1

...


	return next_item

...


    
    def __contains__(self, word):

...


	if word.lower() in self.__stopwords:

...


	    return True
	else:
	    return False
    
    def load_file(self, file):

...


	stopwords_file = open(file, "r")

...


	for line in stopwords_file:

...


	    utf8_line = line.decode('utf-8').rstrip()

...


	    if not re.match('^\w+$', utf8_line, re.UNICODE):

...


		raise

...

 StoplistFileError
	    self.__stopwords.add(utf8_line.lower())

...


	stopwords_file.close()

...


	
    def remove_stopwords(self, words):

...


	if words is None:

...


	    return None
	filtered_words = []

...


	for word in words:

...


	    if word.lower() not in self:

...


		filtered_words.append(word)

...


	return filtered_words

...