MHTTP
From Scriptwiki
Revision as of 22:02, 10 June 2015 by Aca20031 (talk | contribs) (Adding known issue about not supporting arrays in POST)
Often times you may want to download a web page from the internet using HTTP, but HTTP can be complicated. This script handles most of the basics for you including
- URL Parsing
- SSL
- Cookies
- Sending POST data
- Following redirects
- Handling CHUNKED encoding
Typical usage simply involves using the /http.open, save and close commands to download a file, and creating a signal event to do something with it once it is done.
Example
alias DownloadGooglesLogo { http.open google https://www.google.com/images/srpr/logo11w.png http.save -f google logo.png } on *:SIGNAL:http: { if ($1 == google) { if ($2 == SAVED) { run $3- | http.close $1 } if ($2 == PROGRESS) { echo -atg Download progress: $bytes($3).suf $+ / $+ $bytes($4).suf downloaded } } }
Script
; Ben's (ben@st0rm.net) HTTP for mIRC script (mHTTP) ; Used to download over HTTP ; Basic usage ; /http.open [name] <URL> ; Opens an HTTP handle for the given URL ; /http.save [-f] <name> <file|bvar> ; Downloads a given HTTP resource to the given file or binvar ; -f forces the file to be overwritten ; /http.close <name> ; Closes and HTTP handle for the given URL ; ; Here are some things you can do after opening a handle, but before saving it ; Manage POST data: ; /http.addpost <handle> <variable> <value> ; /http.delpost <handle> <variable> ; Manage cookies ; /http.addcookie <handle> <value> ; $http.cookie(handle,N) - gets Nth cookie ; ; After a request completes (e.g. after a save) you can get some data: ; $http.responseheader(handle, header, N) - Returns value of given responseheader, or $false. Only useful after a request is made ; ; You can mark a socket with data of your choice (it's a transparent write to a hashtable) similar to sockmark ; /http.mark <handle> [data] - Writes the data. Specify no data to delete the mark ; $http.mark(handle) - Reads the data ; ; Signals ; In order to tell via script when something happens (your file is being saved, for example) use ; on *:SIGNAL:http: ; where $1 = Handle name, $2 = Event name, $3- = parameters ; Events: ; COMPLETED <statuscode> - when a request completes ; SAVED <location> - when a save completes ; REDIRECT <location> - when a request is redirected elsewhere ; PROGRESS <bytes> [total size] - When part but not all of the data is downloaded. Total size is not available for chunked transfer ; ; ; Known issues: ; POSTing arrays is not currently supported ; Cookies do not respect attributes such as domain, path, expiration, or secure. They are always sent ; Relative redirects don't work, only absolute ; Redirects from insecure to secure links when you don't have SSL will not be handled well ; Creates a request that when completed is saved to a file alias http.save { if ($1 == -f) { var %force = $true tokenize 32 $2- } else { var %force = $false } if ($0 < 2) { echo -atg * /http.save: Use /http.save <http handle> <file> return } var %hashtable = http. $+ $1 if (!$hget(%hashtable)) { echo -atg * /http.save: No such HTTP handle exists. Use /http.open first return } if ($sock(%hashtable)) { echo -atg * /http.save: There is a pending request for this HTTP handle already. A connection is open the the remote host. Close it, or wait for it to complete return } var %destination = $2- if ($exists(%destination)) { if (%force) { .remove %destination } else { echo -atg * /http.save: File already exists. Try /http.save -f to force an overwrite return } } if (&* iswm %destination) { .hadd %hashtable bvarout %destination } else { ; Open file handler stream for writing .fopen -n %hashtable %destination if ($ferr) { echo -atg * /http.save: Could not use file. Error: $ferr return } ; Store fstream name in hashtable to indicate that we want to save this once it completes .hadd %hashtable fstream %hashtable } ; Set redirect counter to 0 .hadd %hashtable redirects 0 ; Start socket http.sockstart %hashtable } alias http.mark { var %hashtable = http. $+ $1 if ($isid) { ; As an identifier, read the mark return $hget(%hashtable,mark) } else { ; As a command, set the mark if (!$hget(%hashtable)) { echo -atg * /http.mark - No such handle $1 } elseif (!$2) { .hdel %hashtable mark } else { .hadd %hashtable mark $2- } } } ; Opens socket alias -l http.sockstart { if ($hget($1,secure) == $true) { if ($sock($1) && $sock($1).ssl) { http.sendrequest $1 } else { if ($sock($1)) { .sockclose $1 } if (!$sslready) { echo -atg HTTP Error - request $qt($right($1,-5)) would require you to use SSL but you are not SSL ready. Please see http://mirc.com/ssl.html http.close $right($1,-5) return } sockopen -e $1 $hget($1, host) $hget($1, port) } } else { if ($sock($1)) { http.sendrequest $1 } else { sockopen $1 $hget($1, host) $hget($1, port) } } } ; Sends the request. Assumes the socket is open and ready alias -l http.sendrequest { if (!$sock($1)) { echo -atg HTTP Internal error. Socket not open in /http.sendrequest | return } var %inname = $1 var %exname = $right($1,-5) ; Cleanup from previous requests .hdel %inname responseheaders .hdel %inname body .hdel %inname contentlength .hdel %inname chunkleft ; Get POST binvar var %postBV = $http.postdata(%exname) ; Use GET if there's no post data, POST otherwise if ($bvar(%postBV,0) == 0) { sockwrite -n %inname GET $hget(%inname,path) HTTP/1.1 } else { sockwrite -n %inname POST $hget(%inname,path) HTTP/1.1 } ; Send the host sockwrite -n %inname Host: $hget(%inname,host) ; Send the user-agent sockwrite -n %inname User-agent: $hget(%inname,user-agent) sockwrite -n %inname Connection: Keep-Alive ; Only plain-text supported sockwrite -n %inname Accept: text/plain; q=0.5, text/html ; Send cookies if ($http.cookie(%exname, 0) > 0) { sockwrite %inname Cookie: $+ $chr(32) var %i = 1 while ($http.cookie(%exname, %i)) { sockwrite %inname $http.urlencode($ifmatch) $+ ; inc %i } sockwrite -n %inname } ; If POST send content type and length if ($bvar(%postBV,0) > 0) { sockwrite -n %inname Content-Type: application/x-www-form-urlencoded sockwrite -n %inname Content-Length: $v1 } ; End request headers with empty line sockwrite -n %inname ; If POST, send data if ($bvar(%postBV,0) > 0) { sockwrite %inname %postBV ; sockwrite -n %inname } } ; Just some bookeeping in case we miss it on *:SOCKCLOSE:http.*: { if ($fopen($sockname)) { .fclose $sockname } } on *:SOCKOPEN:http.*: { http.sendrequest $sockname } on *:SOCKREAD:http.*: { if ($sockerr > 0) { echo -atg * HTTP handle $qt($right($sockname,-5)) failed with socket error $sockerr http.close $right($sockname,-5) return } ;Read all the data into &bvTemp :read ; First, read this buffer sockread &bvThis if ($sockbr > 0) { ; If data was read, copy it to the on-going buffer and continue bcopy &bvTemp $calc($bvar(&bvTemp,0) +1) &bvThis 1 -1 goto read } if ($bvar(&bvTemp,0) == 0) { echo -atg HTTP sockread called with no data to read... | return } ; Do we need to parse headers? ; If response headers not set, this call has the headers if (!$hget($sockname,responseheaders)) { ; Find the response headers var %end = $bfind(&bvTemp,1, 13 10 13 10) if (%end == 0) { echo -atg Error: No headers found, but no content length so this can't be a subsequent call | return } ; Increase to include the 3 extra characters inc %end 3 bcopy &bvResponseHeaders 1 &bvTemp 1 %end ; If there's still non-header data copy it over if (%end < $bvar(&bvTemp, 0)) { bcopy -c &bvTemp 1 &bvTemp $calc(%end + 1) -1 } else { bunset &bvTemp } ; If this is the first header response, save in hash table if (!$hget($sockname, responseheaders)) { .hadd -b $sockname responseheaders &bvResponseHeaders http.OnHeaders $sockname } } ; there's no more to read, bail if (!$bvar(&bvTemp,0)) { ; If that's all this response has to offer, we're done if ($hget($sockname,contentlength) == 0) { http.ondone $sockname } return } ; Read body var %contentlength = $hget($sockname,contentlength) if (%contentlength == $null || %contentlength == $false) { var %chunked = $true } else { var %chunked = $false } ; Set bvBody to the body ready so far if ($hget($sockname,body)) { noop $hget($sockname, body, &bvBody) } if (%chunked) { :readChunk var %chunkleft = $hget($sockname, chunkleft) if (!%chunkleft) { ; If we've read all of a chunk, or this is the first one, get its size var %crlf = $bfind(&bvTemp, 1, 13 10) if (!%crlf) { echo -atg HTTP internal error. Handle $qt($right($sockname,-5)) is in chunked encoding but no chunk size found in data return } var %chunkleft = $base($bvar(&bvTemp, 1, $calc(%crlf - 1)).text,16,10) if (%chunkleft !isnum) { echo -atg HTTP internal error. Handle $qt($right($sockname,-5)) has invalid chunk size: %chunkleft } ; New chunk else { ; Save chunk size and remove it (plus its \r\n) from temp .hadd $sockname chunkleft %chunkleft .bcopy -c &bvTemp 1 &bvTemp $calc(%crlf +2) -1 if (%chunkLeft == 0) { ; if we just read chunk size 0, that was the end. .hadd -b $sockname body &bvBody http.onDone $sockname return } } } ; read %chunkleft bytes var %bytesToRead = $iif(%chunkLeft < $bvar(&bvTemp,0),%chunkLeft, $bvar(&bvTemp,0)) .bcopy &bvBody $calc($bvar(&bvBody,0) +1) &bvTemp 1 %bytesToRead hdec $sockname chunkleft %bytesToRead .signal http $right($sockname,-5) PROGRESS %bytesToRead if (%bytesToRead == %chunkLeft) { ; If this is the end of the chunk, consume crlf inc %bytesToRead 2 ; If there's still stuff in this var, go back and read the new chunk if (%bytesToRead < $bvar(&bvTemp,0)) { .bcopy -c &bvTemp 1 &bvTemp $calc(%bytesToRead + 1) -1 goto readChunk } } ; End of this call, save body in hashtable for future calls .hadd -b $sockname body &bvBody } else { ; Copy everything .bcopy &bvBody $calc($bvar(&bvBody,0) +1) &bvTemp 1 -1 ; Add read-so-far body to hashtable .hadd -b $sockname body &bvBody .signal http $right($sockname,-5) PROGRESS $bvar(&bvBody,0) $hget($sockname,contentlength) ; Check if we're done if ($bvar(&bvBody,0) >= $hget($sockname,contentlength)) { ; We're done http.onDone $sockname if ($bvar(&bvBody,0) > $hget($sockname,contentlength)) { echo -qatg HTTP Warning: Read past content-length. } } } } ; Internal event called when we get the first header response from the server alias -l http.OnHeaders { noop $hget($1,responseheaders, &bvResponseHeaders) ; First header is always the status response var %end = $bfind(&bvResponseHeaders, 1, 13 10) if (%end <= 1) { echo -atg Error: HTTP $qt($right($1,-5)) Server did not respond with HTTP status goto cleanup } var %status = $bvar(&bvResponseHeaders,1,$calc(%end - 1)).text .hadd $1 responsestatus %status var %version = $gettok(%status,1,32) if (%version != HTTP/1.1 && %version != HTTP/1.0) { echo -atg Error: HTTP $qt($right($1,-5)) Server indicated unknown version: %version goto cleanup } var %statuscode = $gettok(%status,2,32) if (%statuscode !isnum) { echo -atg Error: HTTP $qt($right($1,-5)) Server indicated invalid status code: %statuscode goto cleanup } ; Check transfer type if ($http.responseheader($right($1,-5),Content-Length,1) != $null) { hadd $1 contentlength $v1 } elseif ($http.responseheader($right($1,-5), Transfer-Encoding, 1) != chunked) { echo -atg HTTP Handle $qt($right($1,-5)) has no content-length header and is not chunked echo -atg $bvar(&bvResponseHeaders,1-).text return } ; Add any cookies that were set var %cookie = 1 while ($http.responseheader($right($1,-5), Set-Cookie, %cookie)) { inc %cookie var %cookieValue = $gettok($ifmatch,1,$asc(;)) http.addcookie $right($1,-5) %cookieValue } ; Handle redirects if (%statuscode == 303 || %statuscode == 302 || %statuscode == 307) { var %location = $http.responseheader($right($1,-5), Location) .signal http $right($1,-5) REDIRECT %location http.seturl $1 %location hinc $1 redirects ; If the server doesn't promise us this connection is keep-alive, close it and use a new one if ($http.responseheader($right($1,-5), Connection, 1) != Keep-Alive) { if ($sock($1)) { .sockclose $1 } } if ($hget($1, redirects) > 5) { echo -atg HTTP Handle $qt($right($1,-5)) redirect loop detected return } http.sockstart $1 } return :cleanup if ($sock($1)) { .sockclose $1 } } alias http.addcookie { var %hashtable = http. $+ $1 if (!$hget(%hashtable)) { echo -atg * /http.addcookie: No such HTTP handle return } if ($0 < 2) { echo -atg * /http.addcookie: Usage: /http.addcookie <handle> <value> return } var %i = 1 var %cookieNum = 1 while ($hget(%hashtable,%i).item) { var %item = $ifmatch inc %i if (cookie-* iswm %item) { var %tmp = $gettok(%item,2,$asc(-)) ; If the value is the same, overwrite if ($2- == $hget(%hashtable,%item)) { %cookieNum = %tmp break } ; Otherwise store max cookie num elseif (%tmp >= %cookieNum) { %cookieNum = $calc(%tmp + 1) } } } .hadd %hashtable cookie- $+ %cookieNum $+ -value $2- } ; Gets an HTTP cookie for the given handle ; $http.cookie(<handle>, <index>) alias http.cookie { if ($2 !isnum) { echo -atg * $!http.cookie invalid parameters return } var %hashtable = http. $+ $1 if (!$hget(%hashtable)) { echo -atg * $!http.cookie no such HTTP handle return } var %i = 1 var %found = 0 while ($hget(%hashtable, %i).item) { inc %i var %item = $ifmatch if (cookie-*-value iswm %item) { inc %found var %id = $gettok(%item,2,$asc(-)) if (%found == $2) { return $hget(%hashtable, cookie- $+ %id $+ -value) } } } if ($2 == 0) { return %found } } ; Returns the value of the given response header ; Case sensitive since it is binary variable operation alias http.responseheader { if ($0 < 2) { echo -atg Invalid usage. Use: $http.responseheader(<handle>, <header>[, index]) return } ; %index is parameter 3 - 1-N or 0 for number var %index = 1 if ($3 isnum) { %index = $3 } ; Veriy hash table exists var %table = http. $+ $1 if (!$hget(%table,responseheaders)) { return $false } noop $hget(%table,responseheaders, &bvResponseHeaders)) ; Set up parameters for the header search ; Offset is where we start searching from in the loop ; thisIndex is the number of matches found so far var %offset = 1 var %thisIndex = 0 ; The main search loop :search var %start = $bfind(&bvResponseHeaders, %offset, $2 $+ :).text ; If there isn't another match, either the index was too large or they want the count if (!%start) { if (%index == 0) { return %thisIndex } else { return $false } } ; Find the end of the header var %end = $bfind(&bvResponseHeaders, %start, 13 10) if (%end <= %start) { echo -atg HTTP Internal error ($http.responseheader), header $qt($2) for handle $qt($1) has no CRLF termination | return $false } inc %thisIndex ; If this not the one they asked for, try again starting from the end of this header if (%index != %thisIndex) { var %offset = %end goto search } ; Find the colon. If there isn't one we'll return the whole string if ($calc($bfind(&bvResponseHeaders, %start, $asc(:)) +1) < %end) { %start = $v1 } return $bvar(&bvResponseHeaders,%start, $calc(%end - %start)).text } ; Internal event called when a request is filled. $1 = internal handle name alias -l http.onDone { if ($hget($1,responseheaders)) { noop $hget($1,responseheaders,&bvResponseHeaders) } ; Write out and close filestream if set if ($hget($1,fstream)) { noop $hget($1,body,&bvBody) if (!$bvar(&bvBody,0)) { .fclose $hget($1,fstream) } else { .fwrite -b $hget($1,fstream) &bvBody ; Save fname so we can use it in the signal after the close var %fname = $fopen($hget($1,fstream)).fname .fclose $hget($1,fstream) .signal http $right($1,-5) SAVED %fname } } ; If socket is still around, close it if ($sock($1)) { .sockclose $1 } ; Reset redirects for future calls .hadd $1 redirects 0 .signal HTTP $right($1,-5) COMPLETED $hget($1,responsestatus) if ($hget($1, bvarout)) { noop $hget($1,body,&bvBody) var %bvarout = $ifmatch bcopy %bvarout 1 &bvBody 1 -1 ; Need to use -n so that the binvar is still in scope ; So we need to do this last .signal -n http $right($1,-5) SAVED %bvarout } } alias http.open { var %hashtable ; If two parameters are given, $1 is the name, else create a new name if ($0 > 1) { %hashtable = http. $+ $1 } else { var %h = $calc($ticks % $rand(1,1000000)) var %hashtable = http. $+ %h } if ($hget(%hashtable)) { echo -atg * /http.open: HTTP Request $qt($1) already exists return } ; Encode the octects of the URL. e.g.: foo bar,pie --> foo%20bar,%2Cpie var %url = $iif($0 > 1, $2-, $1-) var %urlEncoded = $http.urlencode(%url) ; Check the URL if (!$http.urlparse(%urlEncoded)) { echo -atg * /http.open: Malformed URL. Use /http.open [name] <url> return } if ($http.urlparse(%urlEncoded).secure && !$sslready) { echo -atg * /http.open: URL is SSL but $!sslready = false. See http://www.mirc.com/ssl.html return } ; Everything checks out, let's store the relevent bits in a hashtable .hmake %hashtable 10 http.seturl %hashtable %url echo -qatg * /http.open: Opened HTTP request $qt($right(%hashtable,-5)) } alias -l http.seturl { if ($0 < 2) { echo -atg HTTP Internal error, seturl called with no url } var %hashtable = $1 if (!$hget(%hashtable)) { echo -atg HTTP Internal error, seturl called with invalid handle } var %urlEncoded = $http.urlencode($2-) .hadd %hashtable host $http.urlparse(%urlEncoded).host .hadd %hashtable port $http.urlparse(%urlEncoded).port .hadd %hashtable secure $http.urlparse(%urlEncoded).secure .hadd %hashtable path $http.urlparse(%urlEncoded).path .hadd %hashtable user-agent mIRC $version } alias http.list { var %i = 1 var %numFound = 0 while (%i <= $hget(0)) { var %table = $hget(%i) inc %i if (http.* iswm %table) { inc %numFound echo -atg * $right(%table,-5) $+ : Host: $hget(%table,host) Port: $hget(%table,port) Path: $hget(%table,path) } } if (%numFound == 0) { echo -atg * No HTTP handles opened } } alias http.close { var %i = 1 var %numFound = 0 while (%i <= $hget(0)) { var %table = $hget(%i) if (http. $+ $1 iswm %table) { ; Cleanup resources if ($sock(%table)) { .sockclose %table } if ($fopen(%table)) { .fclose %table } if ($hget(%table $+ .postdata)) { .hfree %table $+ .postdata } .hfree %table inc %numFound echo -qatg * HTTP Closed $right(%table,-5) } ; Only inc if we didnt find a match else { inc %i } } if (%numFound == 0) { echo -qatg * No matching HTTP handles } } ; ; Takes an encoded URL and returns based on the property ; .secure - true if https ; .host - the URL host ; .port - the port, 80 by default ; .path - The path to download, / by default ; Returns $false is URL is malformed ; alias http.urlparse { var %secure var %host var %port var %path var %pathIndex var %protocol var %regex = /^(https?://)?([a-z.0-9\-_]+)(:\d+)?(/.*)?$/i if ($regex($1, %regex)) { if (http*:// iswm $regml(1)) { %protocol = $lower($left($regml(1),-3)) } %secure = $false if (%protocol == https) { %secure = $true } if (%protocol) { %host = $regml(2) } else { %host = $regml(1) } var %portIndex = $iif(%protocol, 3, 2) var %portfound = $false if ($left($regml(%portIndex),1) == : && $right($regml(%portIndex),-1) isnum) { %port = $right($regml(%portIndex),-1) %portFound = $true } else { if (%secure) { %port = 443 } else { %port = 80 } } %pathIndex = $iif(%portFound, $calc(%portIndex + 1), %portIndex) if ($regml(0) >= %pathIndex) { %path = $regml(%pathIndex) } else { %path = / } } else { return $false } if ($prop == secure) { return %secure } if ($prop == host) { return %host } if ($prop == path) { return %path } if ($prop == port) { return %port } return $true } alias http.addpost { if ($0 < 3) { echo -atg * /http.addpost: Usage: /http.addpost <handle> <variable> <value> return } var %maintable = http. $+ $1 var %posttable = %maintable $+ .postdata if (!$hget(%maintable)) { echo -atg * /http.addpost: No such HTTP handle found return } if (!$hget(%posttable)) { .hmake %posttable 10 } hadd %posttable $2 $3- echo -qatg Post variable added: $2 $+ = $+ $3- } alias http.delpost { if ($0 < 2) { echo -atg * /http.delpost: Usage: /http.delpost <handle> <variable> return } var %maintable = http. $+ $1 var %posttable = %maintable $+ .postdata if (!$hget(%maintable)) { echo -atg * /http.addpost: No such HTTP handle found return } if (!$hget(%posttable,$2)) { echo -qatg * /http.addpost: No post variable with that name exists return } hdel %posttable $2 echo -qatg Post variable deleted: $2 } ; Usage: $http.postdata(handle) ; Returns a binvar with a post data style query string (x=y&z=a) or $false alias http.postdata { ; Post data is stored in INNAME.postdata var %posttable = http. $+ $1 $+ .postdata if (!$hget(%posttable,0).item) { return $false } var %i = 1 while ($hget(%posttable,%i).item) { var %item = $ifmatch bset -t &bvPost $calc($bvar(&bvPost,0) +1) $iif(%i > 1,&,) $+ $http.urlencode(%item) $+ = $+ $http.urlencode($hget(%posttable,%item)) inc %i } return &bvPost } ; Replace some commonly touchy URL characters with their hex-octect encoding alias -l http.urlencode { return $replacex($1-,$chr(32),% $+ 20, $chr(44), % $+ 2C,+, % $+ 2B, $chr(37), % $+ 25) }
Contributed by aca20031 |