Monday, September 25, 2017

PowerShell: Multiple Tech Website RSS Emailer

Instead of receiving several emails from various tech sites I like to follow, I created a script that pulls RSS from five sites, finds common title key words, groups them together, and sorts by date.  This way I can read articles about similar topics but from different sides with different views about the subject.


#### TECHSPOT ####
[xml]$techspot = iwr https://www.techspot.com/backend.xml
$articles = $techspot.getelementsbytagname('item')

foreach ($a in $articles){
if ([datetime]$a.pubdate -lt (get-date).addhours(-24)){continue}
$a|add-member -notepropertyname "date" -notepropertyvalue ([datetime]$a.pubdate).tostring()
$a|add-member -notepropertyname "Author" -notepropertyvalue $a.creator.'#cdata-section'
$link = $a.link
$title = $a.title.'#cdata-section'.trim()
$a|add-member -notepropertyname "Titlestring" -notepropertyvalue $title
$html = New-Object -ComObject "HTMLFile"
$html.IHTMLDocument2_write($a.description.'#cdata-section')
$imglink = $html.body.getelementsbytagname('img')[0].src
$text = $html.body.innertext.trim()
$a|add-member -notepropertyname "Textonly" -notepropertyvalue $text
$a|add-member -notepropertyname "Source" -notepropertyvalue ("TechSpot " + ([datetime]$a.pubdate).tostring())
$a|add-member -notepropertyname "Index" -notepropertyvalue $MasterNumber
$index = $a.index
$a|add-member -notepropertyname "Article" -notepropertyvalue ("<table width='100%'><tr><td style='font-family: Arial, sans-serif;'><a name=$index href=$link><img width=200 vspace=5 hspace=5 align=left src=" + $imglink + "></a><h3>" + $title + "</h3></td></tr></table><table><tr><td style='font-family: Arial, sans-serif;font-size:13;'>" + $text + "</td></tr></table><hr style='border: 2px solid mintcream;'>")
if ($a.date -ne $nul){[array]$Thelist += $a}
$MasterNumber++
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($html)|out-null
}
#### END TECHSPOT ####

#### ANDROID CENTRAL ####
[xml]$ac = iwr http://feeds2.feedburner.com/androidcentral
$articles = $ac.getelementsbytagname('item')

foreach ($a in $articles){
if ([datetime]$a.pubdate -lt (get-date).addhours(-24)){continue}
$a|add-member -notepropertyname "date" -notepropertyvalue ([datetime]$a.pubdate).tostring()
$title = $a.title.trim()
$a|add-member -notepropertyname "Titlestring" -notepropertyvalue $title
$a|add-member -notepropertyname "Author" -notepropertyvalue $a.creator
$link = $a.link
$html = New-Object -ComObject "HTMLFile"
$encoded =  ($a.encoded.'#cdata-section')
$html.IHTMLDocument2_write($encoded)
$text = $html.body.innerhtml
if ($text -ne $nul){$text = $text.replace("<IMG","<img width=300 align=left hspace=10 vspace=10 ")}
if ($text -eq $nul){
$text = $a.encoded
$text = $text -replace ('<style>+','~')
$text = $text.split('~')[0]
$text = $text.replace("<IMG","<IMG width=300 align=left hspace=10 vspace=10 ")
$text = $text.replace("<img","<img width=300 align=left hspace=10 vspace=10 ")
}
$text = $text.trim()
$a|add-member -notepropertyname "Textonly" -notepropertyvalue $a.description
$a|add-member -notepropertyname "Source" -notepropertyvalue ("Android Central " + ([datetime]$a.pubdate).tostring())
$a|add-member -notepropertyname "Index" -notepropertyvalue $MasterNumber
$index = $a.index
$a|add-member -notepropertyname "Article" -notepropertyvalue ("<table width=100%><tr><td><a name=$index></a><h3>$title</h3></td></tr><tr><td style='font-family: Arial, sans-serif;font-size:13;'>" + $text + "</td></tr></table><hr style='border: 2px solid mintcream;'>")
if ($a.date -ne $nul){[array]$thelist += $a}
$MasterNumber++
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($html)|out-null
}
#### END ANDROID CENTRAL ####

#### ANDROID POLICE ####
[xml]$ap = iwr http://www.androidpolice.com/feed/
$articles = $ap.getelementsbytagname('item')

foreach ($a in $articles){
if ([datetime]$a.pubdate -lt (get-date).addhours(-24)){continue}
$a|add-member -notepropertyname "date" -notepropertyvalue ([datetime]$a.pubdate).tostring()
$link = $a.link
$comments = ($a.link + "#comments")
$title = $a.title.trim()
$title = $title -replace ('\[','"')
$title = $title -replace ('\]','"')
$a|add-member -notepropertyname "Titlestring" -notepropertyvalue $title.trim()
$a|add-member -notepropertyname "Author" -notepropertyvalue $a.creator.'#cdata-section'
$html = New-Object -ComObject "HTMLFile"
$description =  ($a.description.'#cdata-section')
$html.IHTMLDocument2_write($description)
$imglink = $html.body.getelementsbytagname('img')[0].src
$text = $html.body.innertext
$text = $text -replace ('\[','"')
$text = $text -replace ('\]','"')
$text = $text -replace ('Read More\s+')
$text = $text -replace ($title)
$text = $text.replace(" was written by the awesome team at Android Police.","")
$text = $text.trim()
$text = $text + " <a href=$comments>Comments</a>"
$a|add-member -notepropertyname "Textonly" -notepropertyvalue $text
$a|add-member -notepropertyname "Source" -notepropertyvalue ("Android Police " + ([datetime]$a.pubdate).tostring())
$a|add-member -notepropertyname "Index" -notepropertyvalue $MasterNumber
$index = $a.index
$a|add-member -notepropertyname "Article" -notepropertyvalue ("<table width='100%'><tr><td style='font-family: Arial, sans-serif;'><a name=$index href=$link><img vspace=5 hspace=5 width=200 align=left src=" + $imglink + "></a><h3>" + $title + "</h3></td></tr></table><table><tr><td style='font-family: Arial, sans-serif;font-size:13;'>" + $text + "</td></tr></table><hr style='border: 2px solid mintcream;'>")
if ($a.date -ne $nul){[array]$thelist += $a}
$MasterNumber++
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($html)|out-null
}
#### END ANDROID POLICE ####

#### SLASHDOT ####
[xml]$slashdot = iwr http://rss.slashdot.org/Slashdot/slashdotMain
$articles = $slashdot.getelementsbytagname('item')

foreach ($a in $articles){
$link = $a.link
$title = $a.title
$a|add-member -notepropertyname "Titlestring" -notepropertyvalue  $title.trim()
$a|add-member -notepropertyname "Author" -notepropertyvalue  $a.creator
$comments = $a.comments
$department = $a.department
$description = $a.description
$subjecticon = ("<img align=left vspace=5 hspace=5 src=https://a.fsdn.com/sd/topics/" + $a.subject + "_64.png>")
$html = New-Object -ComObject "HTMLFile"
$html.IHTMLDocument2_write($a.description)
$text = $html.body.innertext
$text = $text.replace('Read more of this story at Slashdot.','')
$a|add-member -notepropertyname "Textonly" -notepropertyvalue $text
$a|add-member -notepropertyname "Source" -notepropertyvalue ("Slashdot " + ([datetime]$a.date).tostring())
$a|add-member -notepropertyname "Index" -notepropertyvalue $MasterNumber
$index = $a.index
$a|add-member -notepropertyname "Article" -notepropertyvalue ("<table style='width:100%'><tr><td style='font-family: Arial, sans-serif;'><h3><a name=$index href=$link>" + $subjecticon + "</a>" + $title + "</h3><b>" + $comments + "</b> comments <b><i>(" + $department + ")</b></i></td></tr></table><table><tr><td style='font-family: Arial, sans-serif;font-size:13;'>" + $text + "</td></tr></table><hr style='border: 2px solid mintcream;'>")
if ([datetime]$a.date -ge (get-date).addhours(-24)){[array]$TheList += $a}
$MasterNumber++
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($html)|out-null
}
#### END SLASHDOT ####

#### PHONE ARENA ####
[xml]$phonearena = iwr "https://www.phonearena.com/feed"
$articles = $phonearena.rss.channel.item
foreach ($a in $articles){
if ([datetime]$a.pubdate -lt (get-date).addhours(-24)){continue}
$a|add-member -notepropertyname "date" -notepropertyvalue ([datetime]$a.pubdate).tostring()
$link = $a.link
$title = $a.title
$title = $title.replace("???",'"')
$a|add-member -notepropertyname "Titlestring" -notepropertyvalue  $title.trim()
$html = New-Object -ComObject "HTMLFile"
$getimglink =  ($a.description.'#cdata-section')
$html.IHTMLDocument2_write($getimglink)
$imglink = $html.body.getelementsbytagname('img')|select -expand src
$text = $html.body.innertext
$text = $text.replace("???",'"')
$a|add-member -notepropertyname "Textonly" -notepropertyvalue $text.trim()
$a|add-member -notepropertyname "Source" -notepropertyvalue ("Phone Arena " + ([datetime]$a.date).tostring())
$a|add-member -notepropertyname "Index" -notepropertyvalue $MasterNumber
$index = $a.index
$a|add-member -notepropertyname "Article" -notepropertyvalue ("<table width='100%'><tr><td style='font-family: Arial, sans-serif;'><a name=$index href=$link><img width=200 align=left vspace=5 hspace=5 src=" + $imglink + "></a><h3>" + $title + "</h3></td></tr></table><table><tr><td style='font-family: Arial, sans-serif;font-size:13;'>" + $text + "</td></tr></table><hr style='border: 2px solid mintcream;'>")
if ($a.date -ne $nul){[array]$TheList +=$a}
$MasterNumber++
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($html)|out-null
}
#### END PHONE ARENA ####

#### UBERGIZMO ####
[xml]$uber = iwr http://feeds.feedburner.com/ubergizmo
$articles = $uber.GetElementsByTagName('item')
foreach ($a in $articles){
if ([datetime]$a.pubdate -lt (get-date).addhours(-24)){continue}
$a|add-member -notepropertyname "date" -notepropertyvalue ([datetime]$a.pubdate).tostring()
$title = $a.title
$title = $title.replace("???","'")
$title = $title.replace("??",'"')
$a|add-member -notepropertyname "Titlestring" -notepropertyvalue  $title.trim()
$a|add-member -notepropertyname "Author" -notepropertyvalue  $a.creator
$link = $a.link
$html = New-Object -ComObject "HTMLFile"
$article = $a.encoded.'#cdata-section'
$html.IHTMLDocument2_write($article)
$imglink = $html.body.getelementsbytagname('img')[0].src
$text = $html.body.innertext
$text = $text.replace("???","'")
$text = $text.replace("??",'"')
$text = $text.replace($title,'')
$text = $text.replace(', original content from Ubergizmo. Read our Copyrights and terms of use.','')
$a|add-member -notepropertyname "Textonly" -notepropertyvalue $text.trim()
$a|add-member -notepropertyname "Source" -notepropertyvalue ("Ubergizmo " + ([datetime]$a.date).tostring())
$a|add-member -notepropertyname "Index" -notepropertyvalue $MasterNumber
$index = $a.index
$a|add-member -notepropertyname "Article" -notepropertyvalue ("<table width='100%'><tr><td style=font-family: Arial, sans-serif;><a name=$index href=$link><img vspace=5 hspace=5 width=200 align=left src=" + $imglink + "></a><h3>" + $title + "</h3></td></tr></table><table width='100%'><tr><td style='font-family: Arial, sans-serif;font-size:13;'><p>" + $text + "</p></td></tr></table><hr style='border: 2px solid mintcream;'>")
if ($a.date -ne $nul){[array]$TheList +=$a}
$MasterNumber++
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($html)|out-null
}
#### END UBERGIZMO ####

$IgnoreWords = gc \\server\c$\Users\useraccount\Documents\scripts\ignorewords.txt
if ($Ignorewords -eq $nul){$IgnoreWords = gc c:\Users\useraccount\Documents\scripts\ignorewords.txt}
$TitleWordFrequency =  ([string]$thelist.titlestring).split(" ") |?{-not [String]::IsNullOrEmpty($_)} | %{[Regex]::Replace($_,'[^a-zA-Z0-9]','')} |group |sort count -Descending
$ImportantWords = $TitleWordFrequency |?{$IgnoreWords -notcontains $_.name} | select @{n='ImportanceWeight';e={$_.Count * 0.01}}, @{n='ImportantWord';e={$_.Name}} -First 10

$TheList = $TheList|sort {$_.date -as [datetime]} -descending

$number = 0
foreach ($i in $importantwords){
$number++
$sub = 0
foreach ($t in $TheList){
if ($t.importantword -ne $nul){continue}
$sub++
if ($t.titlestring -match (" " + $i.importantword + " ")){$t|add-member -notepropertyname "ImportantWord" -notepropertyvalue $number}
if ($t.importantword -eq $nul){if ($t.titlestring -match (" " + $i.importantword)){$t|add-member -notepropertyname "ImportantWord" -notepropertyvalue $number}}
if ($t.importantword -eq $nul){if ($t.titlestring -match ($i.importantword + " ")){$t|add-member -notepropertyname "ImportantWord" -notepropertyvalue $number}}
if ($t.titlestring -match (" " + $i.importantword + " ")){$t|add-member -notepropertyname "Isub" -notepropertyvalue $sub}
}
}

#Has important words
foreach ($_ in 1..10){
$temp = $TheList|? importantword -eq $_
$temp = $temp|sort isub
$iwList += $temp
}

#Has no important words
$iwList += $TheList|? importantword -eq $nul

$FinalList = "<center><a name=top></a><table style='width:80%;'>"

#Create Title Index
$odd = 0
foreach ($i in $iwList){
$index = ("#" + $i.index)
$Titlestring = $i.titlestring
if ($odd % 2 -eq 0){$FinalList += ("<tr><td><a style='line-height: 1.3;background:#f2f2f2;font-family: Arial, Helvetica, sans-serif;text-decoration:none;font-size:90%;font-weight:bold;color:black;' href=$index>$Titlestring</a></td></tr>")}
if ($odd % 2 -ne 0){$FinalList += ("<tr><td><a style='line-height: 1.3;font-family: Arial, Helvetica, sans-serif;text-decoration:none;font-size:90%;font-weight:bold;color:black;' href=$index>$Titlestring</a></td></tr>")}
$odd++
}

$FinalList += "</table></center>"

foreach ($i in $iwList){
$FinalList += '<a style="font-size:80%" href=#top>Top</a><br>'
$FinalList += ('<span style="font-size:80%">' + $i.Source + '</span><br>')
$FinalList += ('<i style="font-size:80%">' + $i.author + '</i><br>')
$link = $i.link
$FinalList += ("<a style='font-size:80%' href=$link>Article Link</a>")
$FinalList += $i.article
}

$SMTPServer = "smtp.gmail.com"
$SMTPPort = "587"
$Username = "myaccount@gmail.com"
$Password = "mypassword"
$to = "recipient@somemail.com"
$subject = ("Daily Tech Summary - " + (get-date).tolongdatestring())
$message = New-Object System.Net.Mail.MailMessage
$message.subject = $subject
$message.body = $FinalList
$message.to.add($to)
$message.from = $username
$message.IsBodyHTML = $true
$smtp = New-Object System.Net.Mail.SmtpClient($SMTPServer, $SMTPPort);
$smtp.EnableSSL = $true
$smtp.Credentials = New-Object System.Net.NetworkCredential($Username, $Password);
$smtp.send($message)

No comments: