[Hindi]NLP 19# Phrase Matching and Vocabulary P.2 |NLP|Python 3|Natural Language Processing|2019
Code:
# -*- coding: utf-8 -*-
"""NLP_Ex12.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1j3XIpD1UXOI0EU-8PMbBOB-zwTqSPtX9
"""
import spacy
nlp = spacy.load("en_core_web_sm")
from google.colab import files
upload = files.upload()
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)
with open('reaganomics.txt', encoding='unicode_escape') as f:
doc3 = nlp(f.read())
phrase_list = ['voodoo economics','supply-side economics','trickle-down economics', 'free-market economics']
phrase_pattern = [nlp(text) for text in phrase_list]
matcher.add('EconMatcher',None,*phrase_pattern)
found_matches = matcher(doc3)
found_matches
for matches_id, start,end in found_matches:
string_id = nlp.vocab.strings[matches_id]
span = doc3[start:end]
print(matches_id, string_id, start, end, span.text)
for matches_id, start,end in found_matches:
string_id = nlp.vocab.strings[matches_id]
span = doc3[start-5:end+5]
print(matches_id, string_id, start, end, span.text)
"""NLP_Ex12.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1j3XIpD1UXOI0EU-8PMbBOB-zwTqSPtX9
"""
import spacy
nlp = spacy.load("en_core_web_sm")
from google.colab import files
upload = files.upload()
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)
with open('reaganomics.txt', encoding='unicode_escape') as f:
doc3 = nlp(f.read())
phrase_list = ['voodoo economics','supply-side economics','trickle-down economics', 'free-market economics']
phrase_pattern = [nlp(text) for text in phrase_list]
matcher.add('EconMatcher',None,*phrase_pattern)
found_matches = matcher(doc3)
found_matches
for matches_id, start,end in found_matches:
string_id = nlp.vocab.strings[matches_id]
span = doc3[start:end]
print(matches_id, string_id, start, end, span.text)
for matches_id, start,end in found_matches:
string_id = nlp.vocab.strings[matches_id]
span = doc3[start-5:end+5]
print(matches_id, string_id, start, end, span.text)
0 Comments