from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.pipeline import Pipeline from sklearn.linear_model import LogisticRegression def build_pipeline(random_state=42): vect = TfidfVectorizer(max_df=0.9, min_df=1, ngram_range=(1,2)) clf = LogisticRegression(max_iter=1000, random_state=random_state) return Pipeline([ ("tfidf", vect), ("clf", clf) ])