Merge lp:~n-muench/ubuntu/precise/open-vm-tools/open-vm-tools.raring-precise.backport into lp:ubuntu/precise/open-vm-tools

Proposed by Nate Muench (Mink)
Status: Superseded
Proposed branch: lp:~n-muench/ubuntu/precise/open-vm-tools/open-vm-tools.raring-precise.backport
Merge into: lp:ubuntu/precise/open-vm-tools
Diff against target: 114055 lines (+46486/-22288)
371 files modified
.pc/.quilt_patches (+1/-0)
.pc/.quilt_series (+1/-0)
.pc/03-dkms.patch/modules/linux/dkms.conf (+1/-1)
.pc/04-vsock-cve.patch/modules/linux/vsock/linux/af_vsock.c (+5440/-0)
.pc/applied-patches (+1/-0)
ChangeLog (+19/-0)
INSTALL (+7/-2)
Makefile.in (+81/-36)
NEWS (+50/-0)
aclocal.m4 (+46/-26)
autom4te.cache/output.0 (+2849/-2164)
autom4te.cache/output.1 (+2849/-2164)
autom4te.cache/output.2 (+2849/-2164)
autom4te.cache/requests (+32/-6)
autom4te.cache/traces.0 (+767/-672)
autom4te.cache/traces.1 (+1001/-1023)
autom4te.cache/traces.2 (+767/-672)
checkvm/Makefile.in (+42/-10)
checkvm/checkvm.c (+1/-8)
config/compile (+216/-16)
config/config.guess (+146/-118)
config/config.sub (+134/-75)
config/depcomp (+134/-56)
config/install-sh (+18/-11)
config/ltmain.sh (+2665/-1423)
config/missing (+4/-49)
configure (+2715/-2030)
configure.ac (+14/-7)
debian/changelog (+10/-0)
debian/patches/04-vsock-cve.patch (+18/-0)
debian/patches/series (+1/-0)
docs/Makefile.in (+41/-15)
docs/api/Makefile.in (+37/-8)
hgfsclient/Makefile.in (+42/-10)
hgfsclient/hgfsclient.c (+1/-0)
hgfsmounter/Makefile.in (+42/-10)
hgfsmounter/hgfsmounter.c (+2/-0)
lib/Makefile.in (+41/-15)
lib/appUtil/Makefile.in (+38/-9)
lib/auth/Makefile.in (+38/-9)
lib/auth/authPosix.c (+193/-74)
lib/backdoor/Makefile.in (+38/-9)
lib/dict/Makefile.in (+38/-9)
lib/dynxdr/Makefile.in (+38/-9)
lib/err/Makefile.in (+38/-9)
lib/err/errPosix.c (+1/-1)
lib/file/Makefile.in (+38/-9)
lib/file/file.c (+185/-50)
lib/file/fileIO.c (+75/-52)
lib/file/fileIOPosix.c (+406/-33)
lib/file/fileInt.h (+6/-6)
lib/file/fileLockPosix.c (+1/-2)
lib/file/fileLockPrimitive.c (+6/-3)
lib/file/filePosix.c (+512/-200)
lib/file/fileStandAlone.c (+61/-28)
lib/file/fileTempPosix.c (+3/-3)
lib/foundryMsg/Makefile.in (+38/-9)
lib/foundryMsg/foundryMsg.c (+27/-3)
lib/foundryMsg/foundryPropertyListCommon.c (+0/-1)
lib/foundryMsg/vixTranslateErrOpenSource.c (+57/-10)
lib/glibUtils/Makefile.in (+38/-9)
lib/guestApp/Makefile.in (+38/-9)
lib/guestRpc/Makefile.in (+38/-9)
lib/hgfs/Makefile.in (+38/-9)
lib/hgfs/hgfsUtil.c (+2/-2)
lib/hgfsBd/Makefile.in (+38/-9)
lib/hgfsHelper/Makefile.in (+38/-9)
lib/hgfsServer/Makefile.am (+2/-0)
lib/hgfsServer/Makefile.in (+44/-11)
lib/hgfsServer/hgfsDirNotify.h (+33/-13)
lib/hgfsServer/hgfsDirNotifyStub.c (+70/-71)
lib/hgfsServer/hgfsServer.c (+540/-1054)
lib/hgfsServer/hgfsServerInt.h (+122/-96)
lib/hgfsServer/hgfsServerLinux.c (+948/-334)
lib/hgfsServer/hgfsServerOplock.c (+376/-0)
lib/hgfsServer/hgfsServerOplockLinux.c (+376/-0)
lib/hgfsServer/hgfsServerPacketUtil.c (+1/-1)
lib/hgfsServer/hgfsServerParameters.c (+27/-24)
lib/hgfsServerManagerGuest/Makefile.in (+38/-9)
lib/hgfsServerManagerGuest/hgfsChannelGuest.c (+3/-2)
lib/hgfsServerPolicyGuest/Makefile.in (+38/-9)
lib/impersonate/Makefile.in (+38/-9)
lib/include/auth.h (+5/-0)
lib/include/backdoor_def.h (+7/-1)
lib/include/buildNumber.h (+6/-6)
lib/include/circList.h (+1/-1)
lib/include/config.h (+68/-68)
lib/include/cryptoError.h (+2/-2)
lib/include/embed_version.h (+0/-2)
lib/include/file.h (+20/-2)
lib/include/fileIO.h (+6/-2)
lib/include/file_extensions.h (+1/-0)
lib/include/guestStats.h (+2/-0)
lib/include/guest_os.h (+149/-172)
lib/include/guest_os_tables.h (+246/-0)
lib/include/hashTable.h (+2/-0)
lib/include/hgfs.h (+3/-0)
lib/include/hgfsProto.h (+16/-19)
lib/include/hgfsServer.h (+16/-1)
lib/include/hostinfo.h (+63/-61)
lib/include/includeCheck.h (+3/-1)
lib/include/iovector.h (+38/-38)
lib/include/libExport.hh (+6/-6)
lib/include/log.h (+12/-25)
lib/include/loglevel_user.h (+12/-1)
lib/include/memaligned.h (+2/-0)
lib/include/msg.h (+70/-65)
lib/include/msgList.h (+17/-14)
lib/include/msgid.h (+2/-1)
lib/include/mutexRank.h (+14/-1)
lib/include/mutexRankLib.h (+21/-2)
lib/include/panic.h (+15/-16)
lib/include/posix.h (+1/-1)
lib/include/preference.h (+19/-19)
lib/include/procMgr.h (+3/-2)
lib/include/rpcout.h (+5/-0)
lib/include/sha1.h (+19/-1)
lib/include/sigPosixRegs.h (+12/-0)
lib/include/str.h (+39/-39)
lib/include/strutil.h (+18/-11)
lib/include/timeutil.h (+40/-40)
lib/include/userlock.h (+1/-0)
lib/include/util.h (+166/-84)
lib/include/util_shared.h (+3/-2)
lib/include/vix.h (+8/-0)
lib/include/vixCommands.h (+266/-5)
lib/include/vixOpenSource.h (+151/-0)
lib/include/vm_api.h (+15/-12)
lib/include/vm_assert.h (+18/-32)
lib/include/vm_atomic.h (+29/-4)
lib/include/vm_basic_asm.h (+163/-6)
lib/include/vm_basic_asm_x86.h (+18/-0)
lib/include/vm_basic_asm_x86_64.h (+19/-2)
lib/include/vm_basic_defs.h (+38/-8)
lib/include/vm_basic_math.h (+7/-0)
lib/include/vm_basic_types.h (+47/-36)
lib/include/vm_compilation_options.h (+47/-0)
lib/include/vm_ctype.h (+2/-1)
lib/include/vm_device_version.h (+34/-4)
lib/include/vm_legal.h (+22/-10)
lib/include/vm_product.h (+28/-84)
lib/include/vm_product_versions.h (+445/-0)
lib/include/vm_tools_version.h (+89/-2)
lib/include/vm_version.h (+7/-514)
lib/include/vm_vmx_type.h (+57/-0)
lib/include/vmci_defs.h (+88/-9)
lib/include/vmci_sockets.h (+568/-70)
lib/include/vmfs.h (+17/-13)
lib/include/vmware/guestrpc/capabilities.h (+2/-0)
lib/include/vmware/guestrpc/tclodefs.h (+7/-6)
lib/include/vmware/tools/plugin.h (+0/-20)
lib/include/vmware/tools/utils.h (+3/-0)
lib/include/vthreadBase.h (+6/-5)
lib/include/win32util.h (+4/-1)
lib/include/x86cpuid.h (+447/-399)
lib/include/xdrutil.h (+2/-1)
lib/lock/Makefile.in (+38/-9)
lib/lock/ul.c (+10/-8)
lib/lock/ulCondVar.c (+6/-4)
lib/lock/ulExcl.c (+256/-190)
lib/lock/ulInt.h (+85/-71)
lib/lock/ulRW.c (+309/-220)
lib/lock/ulRec.c (+272/-190)
lib/lock/ulSema.c (+95/-85)
lib/lock/ulStats.c (+20/-11)
lib/message/Makefile.in (+38/-9)
lib/misc/Makefile.in (+38/-9)
lib/misc/atomic.c (+2/-0)
lib/misc/base64.c (+1/-1)
lib/misc/codeset.c (+45/-17)
lib/misc/codesetOld.c (+46/-16)
lib/misc/dynbuf.c (+37/-21)
lib/misc/hostinfoPosix.c (+205/-62)
lib/misc/idLinux.c (+12/-2)
lib/misc/machineID.c (+7/-17)
lib/misc/msgList.c (+65/-5)
lib/misc/msgfmt.c (+21/-14)
lib/misc/posixPosix.c (+26/-15)
lib/misc/sha1.c (+4/-2)
lib/misc/strutil.c (+175/-11)
lib/misc/timeutil.c (+0/-2)
lib/misc/utilMem.c (+202/-109)
lib/misc/util_misc.c (+111/-58)
lib/misc/vthreadBase.c (+28/-14)
lib/netUtil/Makefile.in (+38/-9)
lib/panic/Makefile.in (+38/-9)
lib/panic/panic.c (+0/-1)
lib/panicDefault/Makefile.in (+38/-9)
lib/printer/Makefile.in (+38/-9)
lib/procMgr/Makefile.in (+38/-9)
lib/procMgr/procMgrPosix.c (+174/-31)
lib/procMgr/procMgrSolaris.c (+105/-25)
lib/rpcChannel/Makefile.in (+38/-9)
lib/rpcIn/Makefile.in (+38/-9)
lib/rpcOut/Makefile.in (+38/-9)
lib/rpcVmx/Makefile.in (+38/-9)
lib/slashProc/Makefile.in (+38/-9)
lib/slashProc/net.c (+1/-1)
lib/string/Makefile.in (+38/-9)
lib/string/bsd_output_shared.c (+1/-1)
lib/string/bsd_vsnprintf.c (+48/-1)
lib/string/str.c (+66/-79)
lib/stubs/Makefile.in (+39/-10)
lib/stubs/stub-config.c (+5/-0)
lib/stubs/stub-user-msg.c (+8/-0)
lib/syncDriver/Makefile.in (+38/-9)
lib/system/Makefile.in (+38/-9)
lib/unicode/Makefile.in (+38/-9)
lib/user/Makefile.in (+38/-9)
lib/user/util.c (+4/-6)
lib/vmCheck/Makefile.in (+38/-9)
lib/vmSignal/Makefile.in (+38/-9)
lib/wiper/Makefile.in (+38/-9)
lib/xdg/Makefile.in (+38/-9)
libguestlib/Makefile.in (+56/-18)
libhgfs/Makefile.in (+46/-10)
libhgfs/hgfslib.c (+1/-0)
libvmtools/Makefile.in (+46/-10)
libvmtools/i18n.c (+1/-2)
libvmtools/vmtools.c (+1/-0)
libvmtools/vmtoolsLog.c (+88/-5)
m4/libtool.m4 (+1456/-847)
m4/ltoptions.m4 (+24/-8)
m4/ltversion.m4 (+6/-6)
m4/lt~obsolete.m4 (+9/-3)
modules/Makefile.am (+1/-1)
modules/Makefile.in (+49/-13)
modules/freebsd/vmblock/vnops.c (+3/-0)
modules/freebsd/vmhgfs/debug.c (+234/-0)
modules/freebsd/vmhgfs/debug.h (+9/-9)
modules/freebsd/vmhgfs/kernelStubs.h (+0/-1)
modules/freebsd/vmhgfs/state.c (+304/-89)
modules/freebsd/vmhgfs/state.h (+28/-14)
modules/freebsd/vmhgfs/vnops.c (+1/-1)
modules/freebsd/vmhgfs/vnopscommon.c (+430/-219)
modules/freebsd/vmhgfs/vnopscommon.h (+1/-1)
modules/freebsd/vmmemctl/Makefile (+2/-0)
modules/freebsd/vmmemctl/kernelStubsBSD.c (+259/-0)
modules/freebsd/vmmemctl/os.c (+122/-17)
modules/freebsd/vmxnet/if_vxn.c (+6/-0)
modules/linux/dkms.conf (+1/-1)
modules/linux/dkms.sh (+2/-2)
modules/linux/shared/autoconf/file_operations_fsync.c (+47/-0)
modules/linux/shared/compat_ethtool.h (+6/-0)
modules/linux/shared/compat_fs.h (+15/-0)
modules/linux/shared/compat_highmem.h (+7/-15)
modules/linux/shared/compat_netdevice.h (+6/-0)
modules/linux/shared/kernelStubs.h (+0/-1)
modules/linux/shared/vmciKernelAPI1.h (+10/-0)
modules/linux/shared/vmci_defs.h (+88/-9)
modules/linux/shared/vmci_iocontrols.h (+5/-4)
modules/linux/shared/vmci_kernel_if.h (+28/-21)
modules/linux/vmblock/linux/filesystem.c (+2/-3)
modules/linux/vmci/common/vmciCommonInt.h (+22/-5)
modules/linux/vmci/common/vmciContext.c (+163/-14)
modules/linux/vmci/common/vmciContext.h (+5/-5)
modules/linux/vmci/common/vmciDatagram.c (+12/-4)
modules/linux/vmci/common/vmciDriver.c (+8/-3)
modules/linux/vmci/common/vmciHashtable.c (+3/-3)
modules/linux/vmci/common/vmciPageChannel.c (+430/-171)
modules/linux/vmci/common/vmciQPair.c (+15/-12)
modules/linux/vmci/common/vmciQueuePair.c (+210/-119)
modules/linux/vmci/common/vmciRoute.c (+30/-2)
modules/linux/vmci/linux/driver.c (+15/-4)
modules/linux/vmci/linux/vmciKernelIf.c (+20/-8)
modules/linux/vmci/linux/vmci_version.h (+4/-4)
modules/linux/vmci/shared/pgtbl.h (+5/-13)
modules/linux/vmci/shared/vmci_page_channel.h (+532/-39)
modules/linux/vmhgfs/Makefile.kernel (+1/-0)
modules/linux/vmhgfs/dentry.c (+29/-2)
modules/linux/vmhgfs/file.c (+5/-5)
modules/linux/vmhgfs/filesystem.c (+72/-47)
modules/linux/vmhgfs/inode.c (+75/-19)
modules/linux/vmhgfs/page.c (+3/-2)
modules/linux/vmxnet/vmxnet.c (+43/-13)
modules/linux/vmxnet/vmxnet_version.h (+3/-3)
modules/linux/vsock/linux/af_vsock.c (+53/-31)
modules/linux/vsock/linux/stats.c (+2/-0)
modules/linux/vsock/linux/stats.h (+66/-26)
modules/linux/vsock/linux/util.h (+1/-0)
modules/linux/vsock/linux/vmci_sockets_packet.h (+158/-0)
modules/linux/vsock/linux/vsockAddr.c (+3/-10)
modules/linux/vsock/linux/vsockCommon.h (+63/-0)
modules/linux/vsock/linux/vsockPacket.h (+4/-85)
modules/linux/vsock/linux/vsock_version.h (+4/-4)
modules/shared/vmmemctl/backdoor_balloon.c (+408/-0)
modules/shared/vmmemctl/backdoor_balloon.h (+10/-6)
modules/shared/vmmemctl/balloonInt.h (+117/-0)
modules/shared/vmmemctl/balloon_def.h (+201/-17)
modules/shared/vmmemctl/kernelStubs.h (+191/-0)
modules/shared/vmmemctl/os.h (+9/-2)
modules/shared/vmmemctl/vmballoon.c (+617/-600)
modules/shared/vmmemctl/vmballoon.h (+70/-7)
modules/shared/vmxnet/eth_public.h (+9/-7)
modules/shared/vmxnet/vmnet_def.h (+52/-34)
modules/solaris/vmhgfs/kernelStubs.h (+0/-1)
modules/solaris/vmmemctl/Makefile (+2/-0)
modules/solaris/vmmemctl/kernelStubsSolaris.c (+380/-0)
modules/solaris/vmmemctl/os.c (+130/-19)
modules/solaris/vmxnet/vmxnet.c (+4/-0)
modules/solaris/vmxnet3/vmxnet3_main.c (+30/-7)
modules/solaris/vmxnet3/vmxnet3_solaris.h (+2/-1)
modules/solaris/vmxnet3/vmxnet3_solaris_compat.h (+7/-5)
modules/solaris/vmxnet3/vmxnet3s.conf (+9/-0)
rpctool/Makefile.in (+42/-10)
scripts/Makefile.in (+53/-16)
scripts/linux/network (+57/-35)
services/Makefile.in (+41/-15)
services/plugins/Makefile.in (+41/-15)
services/plugins/desktopEvents/Makefile.in (+46/-10)
services/plugins/desktopEvents/desktopEvents.c (+8/-0)
services/plugins/dndcp/Makefile.in (+46/-10)
services/plugins/dndcp/copyPasteUIX11.cpp (+72/-19)
services/plugins/dndcp/copyPasteUIX11.h (+10/-0)
services/plugins/dndcp/dnd/dnd.h (+40/-40)
services/plugins/dndcp/dnd/dndLinux.c (+19/-2)
services/plugins/dndcp/dndGuest/guestDnDCPMgr.cc (+1/-26)
services/plugins/dndcp/stringxx/string.cc (+38/-211)
services/plugins/dndcp/stringxx/string.hh (+10/-11)
services/plugins/dndcp/stringxx/ubstr_t.hh (+9/-0)
services/plugins/guestInfo/Makefile.in (+50/-17)
services/plugins/guestInfo/getlib/Makefile.in (+38/-9)
services/plugins/guestInfo/getlib/guestInfo.c (+1/-1)
services/plugins/guestInfo/getlib/guestInfoPosix.c (+15/-8)
services/plugins/guestInfo/guestInfoServer.c (+19/-8)
services/plugins/hgfsServer/Makefile.in (+46/-10)
services/plugins/hgfsServer/hgfsPlugin.c (+1/-0)
services/plugins/powerOps/Makefile.in (+46/-10)
services/plugins/powerOps/powerOps.c (+1/-0)
services/plugins/resolutionSet/Makefile.in (+46/-10)
services/plugins/resolutionSet/resolutionSet.c (+1/-0)
services/plugins/resolutionSet/resolutionX11.c (+12/-5)
services/plugins/timeSync/Makefile.in (+46/-10)
services/plugins/timeSync/timeSync.c (+7/-2)
services/plugins/vix/Makefile.in (+46/-10)
services/plugins/vix/foundryToolsDaemon.c (+27/-14)
services/plugins/vix/vixPlugin.c (+1/-0)
services/plugins/vix/vixTools.c (+1843/-123)
services/plugins/vix/vixToolsInt.h (+26/-2)
services/plugins/vmbackup/Makefile.in (+46/-10)
services/plugins/vmbackup/stateMachine.c (+1/-0)
services/vmtoolsd/Makefile.in (+53/-14)
services/vmtoolsd/cmdLine.c (+1/-1)
services/vmtoolsd/l10n/de.vmsg (+1/-1)
services/vmtoolsd/l10n/ja.vmsg (+1/-1)
services/vmtoolsd/l10n/ko.vmsg (+1/-1)
services/vmtoolsd/mainLoop.c (+7/-0)
services/vmtoolsd/mainPosix.c (+1/-0)
services/vmtoolsd/pluginMgr.c (+14/-0)
services/vmtoolsd/toolsRpc.c (+1/-1)
tests/Makefile.in (+41/-15)
tests/testDebug/Makefile.in (+46/-10)
tests/testPlugin/Makefile.in (+46/-10)
tests/testVmblock/Makefile.in (+41/-12)
tests/vmrpcdbg/Makefile.in (+38/-9)
tests/vmrpcdbg/vmrpcdbg.c (+1/-0)
toolbox/Makefile.in (+42/-10)
toolbox/l10n/de.vmsg (+79/-80)
toolbox/l10n/ja.vmsg (+74/-75)
toolbox/l10n/ko.vmsg (+77/-78)
toolbox/l10n/zh_CN.vmsg (+169/-43)
toolbox/toolbox-cmd.c (+3/-0)
toolbox/toolboxcmd-shrink.c (+61/-52)
toolbox/toolboxcmd-stat.c (+7/-7)
vmblock-fuse/Makefile.in (+42/-10)
vmblockmounter/Makefile.in (+42/-10)
vmblockmounter/vmblockmounter.c (+1/-0)
vmware-user-suid-wrapper/Makefile.in (+53/-14)
vmware-user-suid-wrapper/main.c (+1/-0)
xferlogs/Makefile.in (+42/-10)
xferlogs/xferlogs.c (+1/-0)
To merge this branch: bzr merge lp:~n-muench/ubuntu/precise/open-vm-tools/open-vm-tools.raring-precise.backport
Reviewer Review Type Date Requested Status
Nate Muench (Mink) (community) Needs Resubmitting
Dmitry Shachnev Abstain
Robie Basak Abstain
Ubuntu Security Sponsors Team Pending
Review via email: mp+168165@code.launchpad.net

This proposal has been superseded by a proposal from 2013-06-28.

Description of the change

This adds support for the 3.5 backport kernels.

I for one am sick of seeing this stupid bug report saying "the precise package won't build against the 3.5 kernel." This will effectively kill it.

For the record, this is NOT backport from Raring to Precise. The only thing from Raring is the upstream packaging. The stuff in the debian folder is from the Precise packaging already available in the main repo

To post a comment you must log in.
Revision history for this message
Robie Basak (racb) wrote :

(I am not a sponsor and cannot merge your changes; just an interested bystander. I appreciate your efforts in getting this fixed)

> You mean I have to revert the (/debian) DKMS files from Raring->Precise. Yeah I figured that out.

No, that's not what I meant. Explanation below. Sorry, I've just seen your message; I'd like to have responded to you earlier.

You seem to have backported the entire module but not the packaging, so I'd say that this *is* a backport, which I think carries a higher risk of regression.

IIRC, it is possible to make DKMS packages build different sources depending on the kernel version. So you could arrange for the actual module itself to be based on an identical source for those running the original Precise kernel, and only apply your changes when using the backport kernel.

This way, you could minimise the chance of regressions for those using the original Precise kernel, since although you would be bumping their DKMS packages the actual module source wouldn't change for them. And you'd enhance the package for users of the backport kernel, since the DKMS package doesn't build for them anyway.

See the PATCH and PATCH_MATCH configuration options in dkms.conf for details. Annoyingly it means that you have to carry a full source and a delta, instead of two full sources for two different versions. But the delta can be constructed and the effect is the same.

This is neither a +1 nor a -1; just a note to say that there may be a safer way. I'm not sure what the appropriate approach here should be.

review: Abstain
Revision history for this message
Nate Muench (Mink) (n-muench) wrote :

Nothing has changed in the /debian folder. In fact I originally planned on using the whole Raring packaging, and just downgrading, like you seem to be saying. But it didn't turn out as planned.

Then, I tried to use the upstream packaging used for Raring, along with the /debian folder from Precise. And sure enough it built successfully.

I guess I'm assuming if there are problems (after approval), could fix them. Honestly, I'm just sick of people file bug reports saying that the package in Precise won't build against the 3.5 kernel

Revision history for this message
Dmitry Shachnev (mitya57) wrote :

Can you cherry-pick the changes needed to support 3.5 instead of upgrading to a new upstream version?

Also, as this contains a fix for a CVE, it would be nice if someone from security team reviewed it (maybe it should also go to -security pocket).

review: Abstain
Revision history for this message
Dmitry Shachnev (mitya57) wrote :

Looks like I can't add ~ubuntu-security-sponsors to the reviewers, can you please do that yourself?

Revision history for this message
Nate Muench (Mink) (n-muench) wrote :

> Can you cherry-pick the changes needed to support 3.5 instead of upgrading to
> a new upstream version?
>
> Also, as this contains a fix for a CVE, it would be nice if someone from
> security team reviewed it (maybe it should also go to -security pocket).
Honestly, I don't know what changes were needed for 3.5 support (I'm not able to access VMware git repo for the packaging at the moment).

The reason for upgrading over cherry-picking is because the package has a less likely chance of breakage. I've tested it on Precise, it works to the best of my knowledge.

Revision history for this message
Nate Muench (Mink) (n-muench) wrote :

Gonna redo it, with a patch based on Quantal's modules

review: Needs Resubmitting

Unmerged revisions

28. By Nate Muench (Mink)

* Merging upstream version 2012.12.26-958366.
  - Adds support for 3.5 Kernels (LP: #1083719)
* Adding patch from Mathias Krause <email address hidden> to fix
  kernel stack memory leack in vsock module [CVE-2013-3237].

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file '.pc/.quilt_patches'
2--- .pc/.quilt_patches 1970-01-01 00:00:00 +0000
3+++ .pc/.quilt_patches 2013-06-07 19:45:35 +0000
4@@ -0,0 +1,1 @@
5+debian/patches
6
7=== added file '.pc/.quilt_series'
8--- .pc/.quilt_series 1970-01-01 00:00:00 +0000
9+++ .pc/.quilt_series 2013-06-07 19:45:35 +0000
10@@ -0,0 +1,1 @@
11+series
12
13=== added file '.pc/01-kvers.patch/.timestamp'
14=== added file '.pc/03-dkms.patch/.timestamp'
15=== modified file '.pc/03-dkms.patch/modules/linux/dkms.conf'
16--- .pc/03-dkms.patch/modules/linux/dkms.conf 2011-12-26 11:27:02 +0000
17+++ .pc/03-dkms.patch/modules/linux/dkms.conf 2013-06-07 19:45:35 +0000
18@@ -1,5 +1,5 @@
19 PACKAGE_NAME=open-vm-tools
20-PACKAGE_VERSION=2011.12.20
21+PACKAGE_VERSION=2012.12.26
22 MAKE_CMD_TMPL="make VM_UNAME=\$kernelver \
23 MODULEBUILDDIR=$dkms_tree/$PACKAGE_NAME/$PACKAGE_VERSION/build"
24
25
26=== added directory '.pc/04-vsock-cve.patch'
27=== added file '.pc/04-vsock-cve.patch/.timestamp'
28=== added directory '.pc/04-vsock-cve.patch/modules'
29=== added directory '.pc/04-vsock-cve.patch/modules/linux'
30=== added directory '.pc/04-vsock-cve.patch/modules/linux/vsock'
31=== added directory '.pc/04-vsock-cve.patch/modules/linux/vsock/linux'
32=== added file '.pc/04-vsock-cve.patch/modules/linux/vsock/linux/af_vsock.c'
33--- .pc/04-vsock-cve.patch/modules/linux/vsock/linux/af_vsock.c 1970-01-01 00:00:00 +0000
34+++ .pc/04-vsock-cve.patch/modules/linux/vsock/linux/af_vsock.c 2013-06-07 19:45:35 +0000
35@@ -0,0 +1,5440 @@
36+/*********************************************************
37+ * Copyright (C) 2007-2011 VMware, Inc. All rights reserved.
38+ *
39+ * This program is free software; you can redistribute it and/or modify it
40+ * under the terms of the GNU General Public License as published by the
41+ * Free Software Foundation version 2 and no later version.
42+ *
43+ * This program is distributed in the hope that it will be useful, but
44+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
45+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
46+ * for more details.
47+ *
48+ * You should have received a copy of the GNU General Public License along
49+ * with this program; if not, write to the Free Software Foundation, Inc.,
50+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
51+ *
52+ *********************************************************/
53+
54+/*
55+ * af_vsock.c --
56+ *
57+ * Linux socket module for the VMCI Sockets protocol family.
58+ */
59+
60+
61+/*
62+ * Implementation notes:
63+ *
64+ * - There are two kinds of sockets: those created by user action (such as
65+ * calling socket(2)) and those created by incoming connection request
66+ * packets.
67+ *
68+ * - There are two "global" tables, one for bound sockets (sockets that have
69+ * specified an address that they are responsible for) and one for connected
70+ * sockets (sockets that have established a connection with another socket).
71+ * These tables are "global" in that all sockets on the system are placed
72+ * within them.
73+ * - Note, though, that the bound table contains an extra entry for a list of
74+ * unbound sockets and SOCK_DGRAM sockets will always remain in that list.
75+ * The bound table is used solely for lookup of sockets when packets are
76+ * received and that's not necessary for SOCK_DGRAM sockets since we create
77+ * a datagram handle for each and need not perform a lookup. Keeping
78+ * SOCK_DGRAM sockets out of the bound hash buckets will reduce the chance
79+ * of collisions when looking for SOCK_STREAM sockets and prevents us from
80+ * having to check the socket type in the hash table lookups.
81+ *
82+ * - Sockets created by user action will either be "client" sockets that
83+ * initiate a connection or "server" sockets that listen for connections; we
84+ * do not support simultaneous connects (two "client" sockets connecting).
85+ *
86+ * - "Server" sockets are referred to as listener sockets throughout this
87+ * implementation because they are in the SS_LISTEN state. When a connection
88+ * request is received (the second kind of socket mentioned above), we create
89+ * a new socket and refer to it as a pending socket. These pending sockets
90+ * are placed on the pending connection list of the listener socket. When
91+ * future packets are received for the address the listener socket is bound
92+ * to, we check if the source of the packet is from one that has an existing
93+ * pending connection. If it does, we process the packet for the pending
94+ * socket. When that socket reaches the connected state, it is removed from
95+ * the listener socket's pending list and enqueued in the listener socket's
96+ * accept queue. Callers of accept(2) will accept connected sockets from the
97+ * listener socket's accept queue. If the socket cannot be accepted for some
98+ * reason then it is marked rejected. Once the connection is accepted, it is
99+ * owned by the user process and the responsibility for cleanup falls with
100+ * that user process.
101+ *
102+ * - It is possible that these pending sockets will never reach the connected
103+ * state; in fact, we may never receive another packet after the connection
104+ * request. Because of this, we must schedule a cleanup function to run in
105+ * the future, after some amount of time passes where a connection should
106+ * have been established. This function ensures that the socket is off all
107+ * lists so it cannot be retrieved, then drops all references to the socket
108+ * so it is cleaned up (sock_put() -> sk_free() -> our sk_destruct
109+ * implementation). Note this function will also cleanup rejected sockets,
110+ * those that reach the connected state but leave it before they have been
111+ * accepted.
112+ *
113+ * - Sockets created by user action will be cleaned up when the user
114+ * process calls close(2), causing our release implementation to be called.
115+ * Our release implementation will perform some cleanup then drop the
116+ * last reference so our sk_destruct implementation is invoked. Our
117+ * sk_destruct implementation will perform additional cleanup that's common
118+ * for both types of sockets.
119+ *
120+ * - A socket's reference count is what ensures that the structure won't be
121+ * freed. Each entry in a list (such as the "global" bound and connected
122+ * tables and the listener socket's pending list and connected queue) ensures
123+ * a reference. When we defer work until process context and pass a socket
124+ * as our argument, we must ensure the reference count is increased to ensure
125+ * the socket isn't freed before the function is run; the deferred function
126+ * will then drop the reference.
127+ *
128+ */
129+
130+#include "driver-config.h"
131+
132+#define EXPORT_SYMTAB
133+#include <linux/kmod.h>
134+#include <linux/socket.h>
135+#include <linux/net.h>
136+#include <linux/skbuff.h>
137+#include <linux/miscdevice.h>
138+#include <linux/poll.h>
139+#include <linux/smp.h>
140+#include <linux/bitops.h>
141+#include <linux/list.h>
142+#include <linux/wait.h>
143+#include <linux/init.h>
144+#include <asm/io.h>
145+#if defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
146+# include <linux/ioctl32.h>
147+/* Use weak: not all kernels export sys_ioctl for use by modules */
148+asmlinkage __attribute__((weak)) long
149+sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
150+#endif
151+
152+#include "compat_cred.h"
153+#include "compat_module.h"
154+#include "compat_kernel.h"
155+#include "compat_sock.h"
156+#include "compat_version.h"
157+#include "compat_workqueue.h"
158+#include "compat_mutex.h"
159+
160+#include "vmware.h"
161+
162+#include "vsockCommon.h"
163+#include "vsockPacket.h"
164+#include "vsockVmci.h"
165+
166+#include "vmci_iocontrols.h"
167+
168+#include "af_vsock.h"
169+#include "stats.h"
170+#include "util.h"
171+#include "vsock_version.h"
172+#include "driverLog.h"
173+
174+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9)
175+# error "Linux kernels before 2.6.9 are not supported."
176+#endif
177+
178+/*
179+ * All kernels above 2.6.33 have the kern parameter for the create
180+ * call in struct net_proto_family.
181+ */
182+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33) && \
183+ !defined(VMW_NETCREATE_KERNARG)
184+# define VMW_NETCREATE_KERNARG
185+#endif
186+
187+#define VSOCK_INVALID_FAMILY NPROTO
188+#define VSOCK_AF_IS_REGISTERED(val) ((val) >= 0 && (val) < NPROTO)
189+
190+/* Some kernel versions don't define __user. Define it ourself if so. */
191+#ifndef __user
192+#define __user
193+#endif
194+
195+
196+/*
197+ * Prototypes
198+ */
199+int VSockVmci_GetAFValue(void);
200+
201+/* Internal functions. */
202+static Bool VSockVmciProtoToNotifyStruct(struct sock *sk,
203+ VSockProtoVersion *proto,
204+ Bool oldPktProto);
205+static int VSockVmciGetAFValue(void);
206+static int VSockVmciRecvDgramCB(void *data, VMCIDatagram *dg);
207+static int VSockVmciRecvStreamCB(void *data, VMCIDatagram *dg);
208+static void VSockVmciPeerAttachCB(VMCIId subId,
209+ VMCI_EventData *ed, void *clientData);
210+static void VSockVmciPeerDetachCB(VMCIId subId,
211+ VMCI_EventData *ed, void *clientData);
212+static void VSockVmciRecvPktWork(compat_work_arg work);
213+static int VSockVmciRecvListen(struct sock *sk, VSockPacket *pkt);
214+static int VSockVmciRecvConnectingServer(struct sock *sk,
215+ struct sock *pending, VSockPacket *pkt);
216+static int VSockVmciRecvConnectingClient(struct sock *sk, VSockPacket *pkt);
217+static int VSockVmciRecvConnectingClientNegotiate(struct sock *sk,
218+ VSockPacket *pkt);
219+static int VSockVmciRecvConnectingClientInvalid(struct sock *sk,
220+ VSockPacket *pkt);
221+static int VSockVmciRecvConnected(struct sock *sk, VSockPacket *pkt);
222+static int __VSockVmciBind(struct sock *sk, struct sockaddr_vm *addr);
223+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14)
224+static struct sock *__VSockVmciCreate(struct socket *sock, struct sock *parent,
225+ unsigned int priority, unsigned short type);
226+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
227+static struct sock *__VSockVmciCreate(struct socket *sock, struct sock *parent,
228+ gfp_t priority, unsigned short type);
229+#else
230+static struct sock *__VSockVmciCreate(struct net *net,
231+ struct socket *sock, struct sock *parent,
232+ gfp_t priority, unsigned short type);
233+#endif
234+static void VSockVmciTestUnregister(void);
235+static int VSockVmciRegisterWithVmci(void);
236+static void VSockVmciUnregisterWithVmci(void);
237+static int VSockVmciRegisterAddressFamily(void);
238+static void VSockVmciUnregisterAddressFamily(void);
239+
240+/* Socket operations. */
241+static void VSockVmciSkDestruct(struct sock *sk);
242+static int VSockVmciQueueRcvSkb(struct sock *sk, struct sk_buff *skb);
243+static int VSockVmciRelease(struct socket *sock);
244+static int VSockVmciBind(struct socket *sock,
245+ struct sockaddr *addr, int addrLen);
246+static int VSockVmciDgramConnect(struct socket *sock,
247+ struct sockaddr *addr, int addrLen, int flags);
248+static int VSockVmciStreamConnect(struct socket *sock,
249+ struct sockaddr *addr, int addrLen, int flags);
250+static int VSockVmciAccept(struct socket *sock, struct socket *newsock, int flags);
251+static int VSockVmciGetname(struct socket *sock,
252+ struct sockaddr *addr, int *addrLen, int peer);
253+static unsigned int VSockVmciPoll(struct file *file,
254+ struct socket *sock, poll_table *wait);
255+static int VSockVmciListen(struct socket *sock, int backlog);
256+static int VSockVmciShutdown(struct socket *sock, int mode);
257+
258+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
259+typedef int VSockSetsockoptLenType;
260+#else
261+typedef unsigned int VSockSetsockoptLenType;
262+#endif
263+static int VSockVmciStreamSetsockopt(struct socket *sock, int level, int optname,
264+ char __user *optval,
265+ VSockSetsockoptLenType optlen);
266+
267+static int VSockVmciStreamGetsockopt(struct socket *sock, int level, int optname,
268+ char __user *optval, int __user * optlen);
269+
270+static int VSockVmciDgramSendmsg(struct kiocb *kiocb,
271+ struct socket *sock, struct msghdr *msg, size_t len);
272+static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock,
273+ struct msghdr *msg, size_t len, int flags);
274+static int VSockVmciStreamSendmsg(struct kiocb *kiocb,
275+ struct socket *sock, struct msghdr *msg, size_t len);
276+static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock,
277+ struct msghdr *msg, size_t len, int flags);
278+
279+static int VSockVmciCreate(
280+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
281+ struct net *net,
282+#endif
283+ struct socket *sock, int protocol
284+#ifdef VMW_NETCREATE_KERNARG
285+ , int kern
286+#endif
287+ );
288+
289+
290+/*
291+ * Device operations.
292+ */
293+int VSockVmciDevOpen(struct inode *inode, struct file *file);
294+int VSockVmciDevRelease(struct inode *inode, struct file *file);
295+static int VSockVmciDevIoctl(struct inode *inode, struct file *filp,
296+ u_int iocmd, unsigned long ioarg);
297+#if defined(HAVE_COMPAT_IOCTL) || defined(HAVE_UNLOCKED_IOCTL)
298+static long VSockVmciDevUnlockedIoctl(struct file *filp,
299+ u_int iocmd, unsigned long ioarg);
300+#endif
301+
302+/*
303+ * Variables.
304+ */
305+
306+/* Protocol family. */
307+static struct proto vsockVmciProto = {
308+ .name = "AF_VMCI",
309+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 10)
310+ /* Added in 2.6.10. */
311+ .owner = THIS_MODULE,
312+#endif
313+ /*
314+ * From 2.6.9 until 2.6.11, these address families called sk_alloc_slab()
315+ * and the allocated slab was assigned to the slab variable in the proto
316+ * struct and was created of size slab_obj_size.
317+ * As of 2.6.12 and later, this slab allocation was moved into
318+ * proto_register() and only done if you specified a non-zero value for
319+ * the second argument (alloc_slab); the size of the slab element was
320+ * changed to obj_size.
321+ */
322+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
323+ .slab_obj_size = sizeof (VSockVmciSock),
324+#else
325+ .obj_size = sizeof (VSockVmciSock),
326+#endif
327+};
328+
329+static struct net_proto_family vsockVmciFamilyOps = {
330+ .family = VSOCK_INVALID_FAMILY,
331+ .create = VSockVmciCreate,
332+ .owner = THIS_MODULE,
333+};
334+
335+/* Socket operations, split for DGRAM and STREAM sockets. */
336+static struct proto_ops vsockVmciDgramOps = {
337+ .family = VSOCK_INVALID_FAMILY,
338+ .owner = THIS_MODULE,
339+ .release = VSockVmciRelease,
340+ .bind = VSockVmciBind,
341+ .connect = VSockVmciDgramConnect,
342+ .socketpair = sock_no_socketpair,
343+ .accept = sock_no_accept,
344+ .getname = VSockVmciGetname,
345+ .poll = VSockVmciPoll,
346+ .ioctl = sock_no_ioctl,
347+ .listen = sock_no_listen,
348+ .shutdown = VSockVmciShutdown,
349+ .setsockopt = sock_no_setsockopt,
350+ .getsockopt = sock_no_getsockopt,
351+ .sendmsg = VSockVmciDgramSendmsg,
352+ .recvmsg = VSockVmciDgramRecvmsg,
353+ .mmap = sock_no_mmap,
354+ .sendpage = sock_no_sendpage,
355+};
356+
357+static struct proto_ops vsockVmciStreamOps = {
358+ .family = VSOCK_INVALID_FAMILY,
359+ .owner = THIS_MODULE,
360+ .release = VSockVmciRelease,
361+ .bind = VSockVmciBind,
362+ .connect = VSockVmciStreamConnect,
363+ .socketpair = sock_no_socketpair,
364+ .accept = VSockVmciAccept,
365+ .getname = VSockVmciGetname,
366+ .poll = VSockVmciPoll,
367+ .ioctl = sock_no_ioctl,
368+ .listen = VSockVmciListen,
369+ .shutdown = VSockVmciShutdown,
370+ .setsockopt = VSockVmciStreamSetsockopt,
371+ .getsockopt = VSockVmciStreamGetsockopt,
372+ .sendmsg = VSockVmciStreamSendmsg,
373+ .recvmsg = VSockVmciStreamRecvmsg,
374+ .mmap = sock_no_mmap,
375+ .sendpage = sock_no_sendpage,
376+};
377+
378+static struct file_operations vsockVmciDeviceOps = {
379+ .owner = THIS_MODULE,
380+#ifdef HAVE_UNLOCKED_IOCTL
381+ .unlocked_ioctl = VSockVmciDevUnlockedIoctl,
382+#else
383+ .ioctl = VSockVmciDevIoctl,
384+#endif
385+#ifdef HAVE_COMPAT_IOCTL
386+ .compat_ioctl = VSockVmciDevUnlockedIoctl,
387+#endif
388+ .open = VSockVmciDevOpen,
389+ .release = VSockVmciDevRelease,
390+};
391+
392+static struct miscdevice vsockVmciDevice = {
393+ .name = "vsock",
394+ .minor = MISC_DYNAMIC_MINOR,
395+ .fops = &vsockVmciDeviceOps,
396+};
397+
398+typedef struct VSockRecvPktInfo {
399+ compat_work work;
400+ struct sock *sk;
401+ VSockPacket pkt;
402+} VSockRecvPktInfo;
403+
404+static compat_define_mutex(registrationMutex);
405+static int devOpenCount = 0;
406+static int vsockVmciSocketCount = 0;
407+static int vsockVmciKernClientCount = 0;
408+static Bool vmciDevicePresent = FALSE;
409+static VMCIHandle vmciStreamHandle = { VMCI_INVALID_ID, VMCI_INVALID_ID };
410+static VMCIId qpResumedSubId = VMCI_INVALID_ID;
411+static VMCIId ctxUpdatedSubId = VMCI_INVALID_ID;
412+
413+static int PROTOCOL_OVERRIDE = -1;
414+
415+/*
416+ * Netperf benchmarks have shown significant throughput improvements when the
417+ * QP size is bumped from 64k to 256k. These measurements were taken during the
418+ * K/L.next timeframe. Give users better performance by default.
419+ */
420+#define VSOCK_DEFAULT_QP_SIZE_MIN 128
421+#define VSOCK_DEFAULT_QP_SIZE 262144
422+#define VSOCK_DEFAULT_QP_SIZE_MAX 262144
423+
424+/*
425+ * The default peer timeout indicates how long we will wait for a peer
426+ * response to a control message.
427+ */
428+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
429+
430+#ifdef VMX86_DEVEL
431+# define LOG_PACKET(_pkt) VSockVmciLogPkt(__FUNCTION__, __LINE__, _pkt)
432+#else
433+# define LOG_PACKET(_pkt)
434+#endif
435+
436+
437+/*
438+ *----------------------------------------------------------------------------
439+ *
440+ * VSockVmciOldProtoOverride --
441+ *
442+ * Check to see if the user has asked us to override all sockets to use
443+ * the vsock notify protocol.
444+ *
445+ * Results:
446+ * TRUE if there is a protocol override in effect.
447+ * - oldPktProto is TRUE the original protocol should be used.
448+ * FALSE if there is no override in effect.
449+ *
450+ * Side effects:
451+ * None.
452+ *
453+ *----------------------------------------------------------------------------
454+ */
455+
456+static Bool
457+VSockVmciOldProtoOverride(Bool *oldPktProto) // IN
458+{
459+ ASSERT(oldPktProto);
460+
461+ if (PROTOCOL_OVERRIDE != -1) {
462+ if (PROTOCOL_OVERRIDE == 0) {
463+ *oldPktProto = TRUE;
464+ } else {
465+ *oldPktProto = FALSE;
466+ }
467+ Warning("Proto override in use.\n");
468+ return TRUE;
469+ }
470+
471+ return FALSE;
472+}
473+
474+
475+/*
476+ *----------------------------------------------------------------------------
477+ *
478+ * VSockVmciProtoToNotifyStruct --
479+ *
480+ * Given a particular notify protocol version, setup the socket's notify
481+ * struct correctly.
482+ *
483+ * Results:
484+ * TRUE on success. FALSE otherwise.
485+ *
486+ * Side effects:
487+ * None.
488+ *
489+ *----------------------------------------------------------------------------
490+ */
491+
492+static Bool
493+VSockVmciProtoToNotifyStruct(struct sock *sk, // IN
494+ VSockProtoVersion *proto, // IN
495+ Bool oldPktProto) // IN
496+{
497+ VSockVmciSock *vsk;
498+
499+ ASSERT(sk);
500+ ASSERT(proto);
501+
502+ vsk = vsock_sk(sk);
503+
504+ if (oldPktProto) {
505+ if (*proto != VSOCK_PROTO_INVALID) {
506+ Warning("Can't set both an old and new protocol\n");
507+ return FALSE;
508+ }
509+ vsk->notifyOps = &vSockVmciNotifyPktOps;
510+ goto exit;
511+ }
512+
513+ switch(*proto) {
514+ case VSOCK_PROTO_PKT_ON_NOTIFY:
515+ vsk->notifyOps= &vSockVmciNotifyPktQStateOps;
516+ break;
517+ default:
518+ Warning("Unknown notify protocol version\n");
519+ return FALSE;
520+ }
521+
522+exit:
523+ NOTIFYCALL(vsk, socketInit, sk);
524+ return TRUE;
525+}
526+
527+
528+/*
529+ *----------------------------------------------------------------------------
530+ *
531+ * VSockVmciNewProtoSupportedVersions
532+ *
533+ * Gets the supported REQUEST2/NEGOTIATE2 vsock protocol versions.
534+ *
535+ * Results:
536+ * Either 1 specific protocol version (override mode) or
537+ * VSOCK_PROTO_ALL_SUPPORTED.
538+ *
539+ * Side effects:
540+ * None.
541+ *
542+ *----------------------------------------------------------------------------
543+ */
544+
545+static VSockProtoVersion
546+VSockVmciNewProtoSupportedVersions(void) // IN
547+{
548+ if (PROTOCOL_OVERRIDE != -1) {
549+ return PROTOCOL_OVERRIDE;
550+ }
551+
552+ return VSOCK_PROTO_ALL_SUPPORTED;
553+}
554+
555+
556+/*
557+ *----------------------------------------------------------------------------
558+ *
559+ * VSockSocket_Trusted --
560+ *
561+ * We allow two kinds of sockets to communicate with a restricted VM:
562+ * 1) trusted sockets
563+ * 2) sockets from applications running as the same user as the VM (this
564+ * is only true for the host side and only when using hosted products)
565+ *
566+ * Results:
567+ * TRUE if trusted communication is allowed to peerCid, FALSE otherwise.
568+ *
569+ * Side effects:
570+ * None.
571+ *
572+ *----------------------------------------------------------------------------
573+ */
574+
575+Bool
576+VSockVmciTrusted(VSockVmciSock *vsock, // IN: Local socket
577+ VMCIId peerCid) // IN: Context ID of peer
578+{
579+ int res;
580+
581+ if (vsock->trusted) {
582+ return TRUE;
583+ }
584+
585+ res = VMCI_IsContextOwner(peerCid, &vsock->owner);
586+
587+ return res == VMCI_SUCCESS;
588+}
589+
590+
591+/*
592+ *----------------------------------------------------------------------------
593+ *
594+ * VSockSocket_AllowDgram --
595+ *
596+ * We allow sending datagrams to and receiving datagrams from a
597+ * restricted VM only if it is trusted as described in
598+ * VSockVmciTrusted.
599+ *
600+ * Results:
601+ * TRUE if datagram communication is allowed to peerCid, FALSE otherwise.
602+ *
603+ * Side effects:
604+ * None.
605+ *
606+ *----------------------------------------------------------------------------
607+ */
608+
609+Bool
610+VSockVmciAllowDgram(VSockVmciSock *vsock, // IN: Local socket
611+ VMCIId peerCid) // IN: Context ID of peer
612+{
613+ if (vsock->cachedPeer != peerCid) {
614+ vsock->cachedPeer = peerCid;
615+ if (!VSockVmciTrusted(vsock, peerCid) &&
616+ (VMCIContext_GetPrivFlags(peerCid) & VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
617+ vsock->cachedPeerAllowDgram = FALSE;
618+ } else {
619+ vsock->cachedPeerAllowDgram = TRUE;
620+ }
621+ }
622+
623+ return vsock->cachedPeerAllowDgram;
624+}
625+
626+
627+/*
628+ *----------------------------------------------------------------------------
629+ *
630+ * VMCISock_GetAFValue --
631+ *
632+ * Kernel interface that allows external kernel modules to get the current
633+ * VMCI Sockets address family.
634+ * This version of the function is exported to kernel clients and should not
635+ * change.
636+ *
637+ * Results:
638+ * The address family on success, a negative error on failure.
639+ *
640+ * Side effects:
641+ * None.
642+ *
643+ *----------------------------------------------------------------------------
644+ */
645+
646+int
647+VMCISock_GetAFValue(void)
648+{
649+ int afvalue;
650+
651+ compat_mutex_lock(&registrationMutex);
652+
653+ /*
654+ * Kernel clients are required to explicitly register themselves before they
655+ * can use VMCI Sockets.
656+ */
657+ if (vsockVmciKernClientCount <= 0) {
658+ afvalue = -1;
659+ goto exit;
660+ }
661+
662+ afvalue = VSockVmciGetAFValue();
663+
664+exit:
665+ compat_mutex_unlock(&registrationMutex);
666+ return afvalue;
667+}
668+EXPORT_SYMBOL(VMCISock_GetAFValue);
669+
670+
671+/*
672+ *----------------------------------------------------------------------------
673+ *
674+ * VMCISock_GetLocalCID --
675+ *
676+ * Kernel interface that allows external kernel modules to get the current
677+ * VMCI context id.
678+ * This version of the function is exported to kernel clients and should not
679+ * change.
680+ *
681+ * Results:
682+ * The context id on success, a negative error on failure.
683+ *
684+ * Side effects:
685+ * None.
686+ *
687+ *----------------------------------------------------------------------------
688+ */
689+
690+int
691+VMCISock_GetLocalCID(void)
692+{
693+ int cid;
694+
695+ compat_mutex_lock(&registrationMutex);
696+
697+ /*
698+ * Kernel clients are required to explicitly register themselves before they
699+ * can use VMCI Sockets.
700+ */
701+ if (vsockVmciKernClientCount <= 0) {
702+ cid = -1;
703+ goto exit;
704+ }
705+
706+ cid = VMCI_GetContextID();
707+
708+exit:
709+ compat_mutex_unlock(&registrationMutex);
710+ return cid;
711+}
712+EXPORT_SYMBOL(VMCISock_GetLocalCID);
713+
714+
715+/*
716+ *----------------------------------------------------------------------------
717+ *
718+ * VMCISock_KernelRegister --
719+ *
720+ * Allows a kernel client to register with VMCI Sockets. Must be called
721+ * before VMCISock_GetAFValue within a kernel module. Note that we don't
722+ * actually register the address family until the first time the module
723+ * needs to use it.
724+ *
725+ * Results:
726+ * None.
727+ *
728+ * Side effects:
729+ * None.
730+ *
731+ *----------------------------------------------------------------------------
732+ */
733+
734+void
735+VMCISock_KernelRegister(void)
736+{
737+ compat_mutex_lock(&registrationMutex);
738+ vsockVmciKernClientCount++;
739+ compat_mutex_unlock(&registrationMutex);
740+}
741+EXPORT_SYMBOL(VMCISock_KernelRegister);
742+
743+
744+/*
745+ *----------------------------------------------------------------------------
746+ *
747+ * VMCISock_KernelDeregister --
748+ *
749+ * Allows a kernel client to unregister with VMCI Sockets. Every call
750+ * to VMCISock_KernRegister must be matched with a call to
751+ * VMCISock_KernUnregister.
752+ *
753+ * Results:
754+ None.
755+ *
756+ * Side effects:
757+ * None.
758+ *
759+ *----------------------------------------------------------------------------
760+ */
761+
762+void
763+VMCISock_KernelDeregister(void)
764+{
765+ compat_mutex_lock(&registrationMutex);
766+ vsockVmciKernClientCount--;
767+ VSockVmciTestUnregister();
768+ compat_mutex_unlock(&registrationMutex);
769+}
770+EXPORT_SYMBOL(VMCISock_KernelDeregister);
771+
772+
773+/*
774+ *----------------------------------------------------------------------------
775+ *
776+ * VSockVmciGetAFValue --
777+ *
778+ * Returns the address family value being used.
779+ * Note: The registration mutex must be held when calling this function.
780+ *
781+ * Results:
782+ * The address family on success, a negative error on failure.
783+ *
784+ * Side effects:
785+ * None.
786+ *
787+ *----------------------------------------------------------------------------
788+ */
789+
790+static int
791+VSockVmciGetAFValue(void)
792+{
793+ int afvalue;
794+
795+ afvalue = vsockVmciFamilyOps.family;
796+ if (!VSOCK_AF_IS_REGISTERED(afvalue)) {
797+ afvalue = VSockVmciRegisterAddressFamily();
798+ }
799+
800+ return afvalue;
801+}
802+
803+/*
804+ *----------------------------------------------------------------------------
805+ *
806+ * VSockVmci_GetAFValue --
807+ *
808+ * Returns the address family value being used.
809+ *
810+ * Results:
811+ * The address family on success, a negative error on failure.
812+ *
813+ * Side effects:
814+ * None.
815+ *
816+ *----------------------------------------------------------------------------
817+ */
818+
819+int
820+VSockVmci_GetAFValue(void)
821+{
822+ int afvalue;
823+
824+ compat_mutex_lock(&registrationMutex);
825+ afvalue = VSockVmciGetAFValue();
826+ compat_mutex_unlock(&registrationMutex);
827+
828+ return afvalue;
829+}
830+
831+
832+/*
833+ * Helper functions.
834+ */
835+
836+/*
837+ *----------------------------------------------------------------------------
838+ *
839+ * VSockVmciQueuePairAlloc --
840+ *
841+ * Allocates or attaches to a queue pair. Tries to register with trusted
842+ * status if requested but does not fail if the queuepair could not be
843+ * allocate as trusted (running in the guest)
844+ *
845+ * Results:
846+ * 0 on success. A VSock error on error.
847+ *
848+ * Side effects:
849+ * None.
850+ *
851+ *----------------------------------------------------------------------------
852+ */
853+
854+static int
855+VSockVmciQueuePairAlloc(VMCIQPair **qpair, // OUT
856+ VMCIHandle *handle, // IN/OUT
857+ uint64 produceSize, // IN
858+ uint64 consumeSize, // IN
859+ VMCIId peer, // IN
860+ uint32 flags, // IN
861+ Bool trusted) // IN
862+{
863+ int err = 0;
864+
865+ if (trusted) {
866+ /*
867+ * Try to allocate our queue pair as trusted. This will only work
868+ * if vsock is running in the host.
869+ */
870+
871+ err = VMCIQPair_Alloc(qpair, handle, produceSize, consumeSize,
872+ peer, flags, VMCI_PRIVILEGE_FLAG_TRUSTED);
873+ if (err != VMCI_ERROR_NO_ACCESS) {
874+ goto out;
875+ }
876+ }
877+
878+ err = VMCIQPair_Alloc(qpair, handle, produceSize, consumeSize,
879+ peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
880+out:
881+ if (err < 0) {
882+ Log("Could not attach to queue pair with %d\n", err);
883+ err = VSockVmci_ErrorToVSockError(err);
884+ }
885+
886+ return err;
887+}
888+
889+
890+/*
891+ *----------------------------------------------------------------------------
892+ *
893+ * VSockVmciDatagramCreateHnd --
894+ *
895+ * Creates a datagram handle. Tries to register with trusted
896+ * status but does not fail if the handler could not be allocated
897+ * as trusted (running in the guest).
898+ *
899+ * Results:
900+ * 0 on success. A VMCI error on error.
901+ *
902+ * Side effects:
903+ * None.
904+ *
905+ *----------------------------------------------------------------------------
906+ */
907+
908+static int
909+VSockVmciDatagramCreateHnd(VMCIId resourceID, // IN
910+ uint32 flags, // IN
911+ VMCIDatagramRecvCB recvCB, // IN
912+ void *clientData, // IN
913+ VMCIHandle *outHandle) // OUT
914+{
915+ int err = 0;
916+
917+ /*
918+ * Try to allocate our datagram handler as trusted. This will only work
919+ * if vsock is running in the host.
920+ */
921+
922+ err = VMCIDatagram_CreateHndPriv(resourceID, flags,
923+ VMCI_PRIVILEGE_FLAG_TRUSTED,
924+ recvCB, clientData,
925+ outHandle);
926+
927+ if (err == VMCI_ERROR_NO_ACCESS) {
928+ err = VMCIDatagram_CreateHnd(resourceID, flags,
929+ recvCB, clientData,
930+ outHandle);
931+ }
932+
933+ return err;
934+}
935+
936+
937+/*
938+ *----------------------------------------------------------------------------
939+ *
940+ * VSockVmciTestUnregister --
941+ *
942+ * Tests if it's necessary to unregister the socket family, and does so.
943+ *
944+ * Note that this assumes the registration lock is held.
945+ *
946+ * Results:
947+ * None.
948+ *
949+ * Side effects:
950+ * None.
951+ *
952+ *----------------------------------------------------------------------------
953+ */
954+
955+static void
956+VSockVmciTestUnregister(void)
957+{
958+ if (devOpenCount <= 0 && vsockVmciSocketCount <= 0 &&
959+ vsockVmciKernClientCount <= 0) {
960+ if (VSOCK_AF_IS_REGISTERED(vsockVmciFamilyOps.family)) {
961+ VSockVmciUnregisterAddressFamily();
962+ }
963+ }
964+}
965+
966+
967+/*
968+ *----------------------------------------------------------------------------
969+ *
970+ * VSockVmciRecvDgramCB --
971+ *
972+ * VMCI Datagram receive callback. This function is used specifically for
973+ * SOCK_DGRAM sockets.
974+ *
975+ * This is invoked as part of a tasklet that's scheduled when the VMCI
976+ * interrupt fires. This is run in bottom-half context and if it ever needs
977+ * to sleep it should defer that work to a work queue.
978+ *
979+ * Results:
980+ * Zero on success, negative error code on failure.
981+ *
982+ * Side effects:
983+ * An sk_buff is created and queued with this socket.
984+ *
985+ *----------------------------------------------------------------------------
986+ */
987+
988+static int
989+VSockVmciRecvDgramCB(void *data, // IN
990+ VMCIDatagram *dg) // IN
991+{
992+ struct sock *sk;
993+ size_t size;
994+ struct sk_buff *skb;
995+ VSockVmciSock *vsk;
996+
997+ ASSERT(dg);
998+ ASSERT(dg->payloadSize <= VMCI_MAX_DG_PAYLOAD_SIZE);
999+
1000+ sk = (struct sock *)data;
1001+
1002+ ASSERT(sk);
1003+ /* XXX Figure out why sk->sk_socket can be NULL. */
1004+ ASSERT(sk->sk_socket ? sk->sk_socket->type == SOCK_DGRAM : 1);
1005+
1006+ /*
1007+ * This handler is privileged when this module is running on the
1008+ * host. We will get datagrams from all endpoints (even VMs that
1009+ * are in a restricted context). If we get one from a restricted
1010+ * context then the destination socket must be trusted.
1011+ *
1012+ * NOTE: We access the socket struct without holding the lock here. This
1013+ * is ok because the field we are interested is never modified outside
1014+ * of the create and destruct socket functions.
1015+ */
1016+ vsk = vsock_sk(sk);
1017+ if (!VSockVmciAllowDgram(vsk, VMCI_HANDLE_TO_CONTEXT_ID(dg->src))) {
1018+ return VMCI_ERROR_NO_ACCESS;
1019+ }
1020+
1021+ size = VMCI_DG_SIZE(dg);
1022+
1023+ /*
1024+ * Attach the packet to the socket's receive queue as an sk_buff.
1025+ */
1026+ skb = alloc_skb(size, GFP_ATOMIC);
1027+ if (skb) {
1028+ /* compat_sk_receive_skb() will do a sock_put(), so hold here. */
1029+ sock_hold(sk);
1030+ skb_put(skb, size);
1031+ memcpy(skb->data, dg, size);
1032+ compat_sk_receive_skb(sk, skb, 0);
1033+ }
1034+
1035+ return VMCI_SUCCESS;
1036+}
1037+
1038+
1039+/*
1040+ *----------------------------------------------------------------------------
1041+ *
1042+ * VSockVmciRecvStreamCB --
1043+ *
1044+ * VMCI stream receive callback for control datagrams. This function is
1045+ * used specifically for SOCK_STREAM sockets.
1046+ *
1047+ * This is invoked as part of a tasklet that's scheduled when the VMCI
1048+ * interrupt fires. This is run in bottom-half context but it defers most
1049+ * of its work to the packet handling work queue.
1050+ *
1051+ * Results:
1052+ * Zero on success, negative error code on failure.
1053+ *
1054+ * Side effects:
1055+ * None.
1056+ *
1057+ *----------------------------------------------------------------------------
1058+ */
1059+
1060+static int
1061+VSockVmciRecvStreamCB(void *data, // IN
1062+ VMCIDatagram *dg) // IN
1063+{
1064+ struct sock *sk;
1065+ struct sockaddr_vm dst;
1066+ struct sockaddr_vm src;
1067+ VSockPacket *pkt;
1068+ VSockVmciSock *vsk;
1069+ Bool bhProcessPkt;
1070+ int err;
1071+
1072+ ASSERT(dg);
1073+ ASSERT(dg->payloadSize <= VMCI_MAX_DG_PAYLOAD_SIZE);
1074+
1075+ sk = NULL;
1076+ err = VMCI_SUCCESS;
1077+ bhProcessPkt = FALSE;
1078+
1079+ /*
1080+ * Ignore incoming packets from contexts without sockets, or resources that
1081+ * aren't vsock implementations.
1082+ */
1083+
1084+ if (!VSockAddr_SocketContextStream(VMCI_HANDLE_TO_CONTEXT_ID(dg->src)) ||
1085+ VSOCK_PACKET_RID != VMCI_HANDLE_TO_RESOURCE_ID(dg->src)) {
1086+ return VMCI_ERROR_NO_ACCESS;
1087+ }
1088+
1089+ if (VMCI_DG_SIZE(dg) < sizeof *pkt) {
1090+ /* Drop datagrams that do not contain full VSock packets. */
1091+ return VMCI_ERROR_INVALID_ARGS;
1092+ }
1093+
1094+ pkt = (VSockPacket *)dg;
1095+
1096+ LOG_PACKET(pkt);
1097+
1098+ /*
1099+ * Find the socket that should handle this packet. First we look for
1100+ * a connected socket and if there is none we look for a socket bound to
1101+ * the destintation address.
1102+ *
1103+ * Note that we don't initialize the family member of the src and dst
1104+ * sockaddr_vm since we don't want to call VMCISock_GetAFValue() and
1105+ * possibly register the address family.
1106+ */
1107+ VSockAddr_InitNoFamily(&src,
1108+ VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src),
1109+ pkt->srcPort);
1110+
1111+ VSockAddr_InitNoFamily(&dst,
1112+ VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.dst),
1113+ pkt->dstPort);
1114+
1115+ sk = VSockVmciFindConnectedSocket(&src, &dst);
1116+ if (!sk) {
1117+ sk = VSockVmciFindBoundSocket(&dst);
1118+ if (!sk) {
1119+ /*
1120+ * We could not find a socket for this specified address. If this
1121+ * packet is a RST, we just drop it. If it is another packet, we send
1122+ * a RST. Note that we do not send a RST reply to RSTs so that we do
1123+ * not continually send RSTs between two endpoints.
1124+ *
1125+ * Note that since this is a reply, dst is src and src is dst.
1126+ */
1127+ if (VSOCK_SEND_RESET_BH(&dst, &src, pkt) < 0) {
1128+ Log("unable to send reset.\n");
1129+ }
1130+ err = VMCI_ERROR_NOT_FOUND;
1131+ goto out;
1132+ }
1133+ }
1134+
1135+ /*
1136+ * If the received packet type is beyond all types known to this
1137+ * implementation, reply with an invalid message. Hopefully this will help
1138+ * when implementing backwards compatibility in the future.
1139+ */
1140+ if (pkt->type >= VSOCK_PACKET_TYPE_MAX) {
1141+ VSOCK_SEND_INVALID_BH(&dst, &src);
1142+ err = VMCI_ERROR_INVALID_ARGS;
1143+ goto out;
1144+ }
1145+
1146+ /*
1147+ * This handler is privileged when this module is running on the host.
1148+ * We will get datagram connect requests from all endpoints (even VMs that
1149+ * are in a restricted context). If we get one from a restricted context
1150+ * then the destination socket must be trusted.
1151+ *
1152+ * NOTE: We access the socket struct without holding the lock here. This
1153+ * is ok because the field we are interested is never modified outside
1154+ * of the create and destruct socket functions.
1155+ */
1156+ vsk = vsock_sk(sk);
1157+ if (!VSockVmciAllowDgram(vsk, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src))) {
1158+ err = VMCI_ERROR_NO_ACCESS;
1159+ goto out;
1160+ }
1161+
1162+ /*
1163+ * We do most everything in a work queue, but let's fast path the
1164+ * notification of reads and writes to help data transfer performance. We
1165+ * can only do this if there is no process context code executing for this
1166+ * socket since that may change the state.
1167+ */
1168+ bh_lock_sock(sk);
1169+
1170+ if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) {
1171+ NOTIFYCALL(vsk, handleNotifyPkt, sk, pkt, TRUE, &dst, &src, &bhProcessPkt);
1172+ }
1173+
1174+ bh_unlock_sock(sk);
1175+
1176+ if (!bhProcessPkt) {
1177+ VSockRecvPktInfo *recvPktInfo;
1178+
1179+ recvPktInfo = kmalloc(sizeof *recvPktInfo, GFP_ATOMIC);
1180+ if (!recvPktInfo) {
1181+ if (VSOCK_SEND_RESET_BH(&dst, &src, pkt) < 0) {
1182+ Warning("unable to send reset\n");
1183+ }
1184+ err = VMCI_ERROR_NO_MEM;
1185+ goto out;
1186+ }
1187+
1188+ recvPktInfo->sk = sk;
1189+ memcpy(&recvPktInfo->pkt, pkt, sizeof recvPktInfo->pkt);
1190+ COMPAT_INIT_WORK(&recvPktInfo->work, VSockVmciRecvPktWork, recvPktInfo);
1191+
1192+ compat_schedule_work(&recvPktInfo->work);
1193+ /*
1194+ * Clear sk so that the reference count incremented by one of the Find
1195+ * functions above is not decremented below. We need that reference
1196+ * count for the packet handler we've scheduled to run.
1197+ */
1198+ sk = NULL;
1199+ }
1200+
1201+out:
1202+ if (sk) {
1203+ sock_put(sk);
1204+ }
1205+ return err;
1206+}
1207+
1208+
1209+/*
1210+ *----------------------------------------------------------------------------
1211+ *
1212+ * VSockVmciPeerAttachCB --
1213+ *
1214+ * Invoked when a peer attaches to a queue pair.
1215+ *
1216+ * Right now this does not do anything.
1217+ *
1218+ * Results:
1219+ * None.
1220+ *
1221+ * Side effects:
1222+ * May modify socket state and signal socket.
1223+ *
1224+ *----------------------------------------------------------------------------
1225+ */
1226+
1227+static void
1228+VSockVmciPeerAttachCB(VMCIId subId, // IN
1229+ VMCI_EventData *eData, // IN
1230+ void *clientData) // IN
1231+{
1232+ struct sock *sk;
1233+ VMCIEventPayload_QP *ePayload;
1234+ VSockVmciSock *vsk;
1235+
1236+ ASSERT(eData);
1237+ ASSERT(clientData);
1238+
1239+ sk = (struct sock *)clientData;
1240+ ePayload = VMCIEventDataPayload(eData);
1241+
1242+ vsk = vsock_sk(sk);
1243+
1244+ /*
1245+ * We don't ask for delayed CBs when we subscribe to this event (we pass 0
1246+ * as flags to VMCIEvent_Subscribe()). VMCI makes no guarantees in that
1247+ * case about what context we might be running in, so it could be BH or
1248+ * process, blockable or non-blockable. And bh_lock_sock() is very
1249+ * particular about how it gets called (it's *not* the same as
1250+ * spin_lock_bh(), it expands directly into a spin_lock()). So we need to
1251+ * account for all possible contexts here.
1252+ */
1253+ local_bh_disable();
1254+ bh_lock_sock(sk);
1255+
1256+ /*
1257+ * XXX This is lame, we should provide a way to lookup sockets by qpHandle.
1258+ */
1259+ if (VMCI_HANDLE_EQUAL(vsk->qpHandle, ePayload->handle)) {
1260+ /*
1261+ * XXX This doesn't do anything, but in the future we may want to set
1262+ * a flag here to verify the attach really did occur and we weren't just
1263+ * sent a datagram claiming it was.
1264+ */
1265+ goto out;
1266+ }
1267+
1268+out:
1269+ bh_unlock_sock(sk);
1270+ local_bh_enable();
1271+}
1272+
1273+
1274+/*
1275+ *----------------------------------------------------------------------------
1276+ *
1277+ * VSockVmciHandleDetach --
1278+ *
1279+ * Perform the work necessary when the peer has detached.
1280+ *
1281+ * Note that this assumes the socket lock is held.
1282+ *
1283+ * Results:
1284+ * None.
1285+ *
1286+ * Side effects:
1287+ * The socket's and its peer's shutdown mask will be set appropriately,
1288+ * and any callers waiting on this socket will be awoken.
1289+ *
1290+ *----------------------------------------------------------------------------
1291+ */
1292+
1293+static void
1294+VSockVmciHandleDetach(struct sock *sk) // IN
1295+{
1296+ VSockVmciSock *vsk;
1297+
1298+ ASSERT(sk);
1299+
1300+ vsk = vsock_sk(sk);
1301+ if (!VMCI_HANDLE_INVALID(vsk->qpHandle)) {
1302+ ASSERT(vsk->qpair);
1303+
1304+ sock_set_flag(sk, SOCK_DONE);
1305+
1306+ /* On a detach the peer will not be sending or receiving anymore. */
1307+ vsk->peerShutdown = SHUTDOWN_MASK;
1308+
1309+ /*
1310+ * We should not be sending anymore since the peer won't be there to
1311+ * receive, but we can still receive if there is data left in our consume
1312+ * queue.
1313+ */
1314+ if (VSockVmciStreamHasData(vsk) <= 0) {
1315+ if (sk->sk_state == SS_CONNECTING) {
1316+ /*
1317+ * The peer may detach from a queue pair while we are
1318+ * still in the connecting state, i.e., if the peer VM is
1319+ * killed after attaching to a queue pair, but before we
1320+ * complete the handshake. In that case, we treat the
1321+ * detach event like a reset.
1322+ */
1323+
1324+ sk->sk_state = SS_UNCONNECTED;
1325+ sk->sk_err = ECONNRESET;
1326+ sk->sk_error_report(sk);
1327+ return;
1328+ }
1329+ sk->sk_state = SS_UNCONNECTED;
1330+ }
1331+ sk->sk_state_change(sk);
1332+ }
1333+}
1334+
1335+
1336+/*
1337+ *----------------------------------------------------------------------------
1338+ *
1339+ * VSockVmciPeerDetachCB --
1340+ *
1341+ * Invoked when a peer detaches from a queue pair.
1342+ *
1343+ * Results:
1344+ * None.
1345+ *
1346+ * Side effects:
1347+ * May modify socket state and signal socket.
1348+ *
1349+ *----------------------------------------------------------------------------
1350+ */
1351+
1352+static void
1353+VSockVmciPeerDetachCB(VMCIId subId, // IN
1354+ VMCI_EventData *eData, // IN
1355+ void *clientData) // IN
1356+{
1357+ struct sock *sk;
1358+ VMCIEventPayload_QP *ePayload;
1359+ VSockVmciSock *vsk;
1360+
1361+ ASSERT(eData);
1362+ ASSERT(clientData);
1363+
1364+ sk = (struct sock *)clientData;
1365+ ePayload = VMCIEventDataPayload(eData);
1366+ vsk = vsock_sk(sk);
1367+ if (VMCI_HANDLE_INVALID(ePayload->handle)) {
1368+ return;
1369+ }
1370+
1371+ /* Same rules for locking as for PeerAttachCB(). */
1372+ local_bh_disable();
1373+ bh_lock_sock(sk);
1374+
1375+ /*
1376+ * XXX This is lame, we should provide a way to lookup sockets by qpHandle.
1377+ */
1378+ if (VMCI_HANDLE_EQUAL(vsk->qpHandle, ePayload->handle)) {
1379+ VSockVmciHandleDetach(sk);
1380+ }
1381+
1382+ bh_unlock_sock(sk);
1383+ local_bh_enable();
1384+}
1385+
1386+
1387+/*
1388+ *----------------------------------------------------------------------------
1389+ *
1390+ * VSockVmciQPResumedCB --
1391+ *
1392+ * Invoked when a VM is resumed. We must mark all connected stream sockets
1393+ * as detached.
1394+ *
1395+ * Results:
1396+ * None.
1397+ *
1398+ * Side effects:
1399+ * May modify socket state and signal socket.
1400+ *
1401+ *----------------------------------------------------------------------------
1402+ */
1403+
1404+static void
1405+VSockVmciQPResumedCB(VMCIId subId, // IN
1406+ VMCI_EventData *eData, // IN
1407+ void *clientData) // IN
1408+{
1409+ uint32 i;
1410+
1411+ spin_lock_bh(&vsockTableLock);
1412+
1413+ /*
1414+ * XXX This loop should probably be provided by util.{h,c}, but that's for
1415+ * another day.
1416+ */
1417+ for (i = 0; i < ARRAYSIZE(vsockConnectedTable); i++) {
1418+ VSockVmciSock *vsk;
1419+
1420+ list_for_each_entry(vsk, &vsockConnectedTable[i], connectedTable) {
1421+ struct sock *sk = sk_vsock(vsk);
1422+
1423+ /*
1424+ * XXX Technically this is racy but the resulting outcome from such
1425+ * a race is relatively harmless. My next change will be a fix to
1426+ * this.
1427+ */
1428+ VSockVmciHandleDetach(sk);
1429+ }
1430+ }
1431+
1432+ spin_unlock_bh(&vsockTableLock);
1433+}
1434+
1435+
1436+/*
1437+ *----------------------------------------------------------------------------
1438+ *
1439+ * VSockVmciPendingWork --
1440+ *
1441+ * Releases the resources for a pending socket if it has not reached the
1442+ * connected state and been accepted by a user process.
1443+ *
1444+ * Results:
1445+ * None.
1446+ *
1447+ * Side effects:
1448+ * The socket may be removed from the connected list and all its resources
1449+ * freed.
1450+ *
1451+ *----------------------------------------------------------------------------
1452+ */
1453+
1454+static void
1455+VSockVmciPendingWork(compat_delayed_work_arg work) // IN
1456+{
1457+ struct sock *sk;
1458+ struct sock *listener;
1459+ VSockVmciSock *vsk;
1460+ Bool cleanup;
1461+
1462+ vsk = COMPAT_DELAYED_WORK_GET_DATA(work, VSockVmciSock, dwork);
1463+ ASSERT(vsk);
1464+
1465+ sk = sk_vsock(vsk);
1466+ listener = vsk->listener;
1467+ cleanup = TRUE;
1468+
1469+ ASSERT(listener);
1470+
1471+ lock_sock(listener);
1472+ lock_sock(sk);
1473+
1474+ /*
1475+ * The socket should be on the pending list or the accept queue, but not
1476+ * both. It's also possible that the socket isn't on either.
1477+ */
1478+ ASSERT( ( VSockVmciIsPending(sk) && !VSockVmciInAcceptQueue(sk))
1479+ || (!VSockVmciIsPending(sk) && VSockVmciInAcceptQueue(sk))
1480+ || (!VSockVmciIsPending(sk) && !VSockVmciInAcceptQueue(sk)));
1481+
1482+ if (VSockVmciIsPending(sk)) {
1483+ VSockVmciRemovePending(listener, sk);
1484+ } else if (!vsk->rejected) {
1485+ /*
1486+ * We are not on the pending list and accept() did not reject us, so we
1487+ * must have been accepted by our user process. We just need to drop our
1488+ * references to the sockets and be on our way.
1489+ */
1490+ cleanup = FALSE;
1491+ goto out;
1492+ }
1493+
1494+ listener->sk_ack_backlog--;
1495+
1496+ /*
1497+ * We need to remove ourself from the global connected sockets list so
1498+ * incoming packets can't find this socket, and to reduce the reference
1499+ * count.
1500+ */
1501+ if (VSockVmciInConnectedTable(sk)) {
1502+ VSockVmciRemoveConnected(sk);
1503+ }
1504+
1505+ sk->sk_state = SS_FREE;
1506+
1507+out:
1508+ release_sock(sk);
1509+ release_sock(listener);
1510+ if (cleanup) {
1511+ sock_put(sk);
1512+ }
1513+ sock_put(sk);
1514+ sock_put(listener);
1515+}
1516+
1517+
1518+/*
1519+ *----------------------------------------------------------------------------
1520+ *
1521+ * VSockVmciRecvPktWork --
1522+ *
1523+ * Handles an incoming control packet for the provided socket. This is the
1524+ * state machine for our stream sockets.
1525+ *
1526+ * Results:
1527+ * None.
1528+ *
1529+ * Side effects:
1530+ * May set state and wakeup threads waiting for socket state to change.
1531+ *
1532+ *----------------------------------------------------------------------------
1533+ */
1534+
1535+static void
1536+VSockVmciRecvPktWork(compat_work_arg work) // IN
1537+{
1538+ VSockRecvPktInfo *recvPktInfo;
1539+ VSockPacket *pkt;
1540+ struct sock *sk;
1541+
1542+ recvPktInfo = COMPAT_WORK_GET_DATA(work, VSockRecvPktInfo, work);
1543+ ASSERT(recvPktInfo);
1544+
1545+ sk = recvPktInfo->sk;
1546+ pkt = &recvPktInfo->pkt;
1547+
1548+ ASSERT(pkt);
1549+ ASSERT(pkt->type < VSOCK_PACKET_TYPE_MAX);
1550+
1551+ lock_sock(sk);
1552+
1553+ switch (sk->sk_state) {
1554+ case SS_LISTEN:
1555+ VSockVmciRecvListen(sk, pkt);
1556+ break;
1557+ case SS_CONNECTING:
1558+ /*
1559+ * Processing of pending connections for servers goes through the
1560+ * listening socket, so see VSockVmciRecvListen() for that path.
1561+ */
1562+ VSockVmciRecvConnectingClient(sk, pkt);
1563+ break;
1564+ case SS_CONNECTED:
1565+ VSockVmciRecvConnected(sk, pkt);
1566+ break;
1567+ default:
1568+ /*
1569+ * Because this function does not run in the same context as
1570+ * VSockVmciRecvStreamCB it is possible that the socket
1571+ * has closed. We need to let the other side know or it could
1572+ * be sitting in a connect and hang forever. Send a reset to prevent
1573+ * that.
1574+ */
1575+ VSOCK_SEND_RESET(sk, pkt);
1576+ goto out;
1577+ }
1578+
1579+out:
1580+ release_sock(sk);
1581+ kfree(recvPktInfo);
1582+ /*
1583+ * Release reference obtained in the stream callback when we fetched this
1584+ * socket out of the bound or connected list.
1585+ */
1586+ sock_put(sk);
1587+}
1588+
1589+
1590+/*
1591+ *----------------------------------------------------------------------------
1592+ *
1593+ * VSockVmciRecvListen --
1594+ *
1595+ * Receives packets for sockets in the listen state.
1596+ *
1597+ * Note that this assumes the socket lock is held.
1598+ *
1599+ * Results:
1600+ * Zero on success, negative error code on failure.
1601+ *
1602+ * Side effects:
1603+ * A new socket may be created and a negotiate control packet is sent.
1604+ *
1605+ *----------------------------------------------------------------------------
1606+ */
1607+
1608+static int
1609+VSockVmciRecvListen(struct sock *sk, // IN
1610+ VSockPacket *pkt) // IN
1611+{
1612+ struct sock *pending;
1613+ VSockVmciSock *vpending;
1614+ int err;
1615+ uint64 qpSize;
1616+ Bool oldRequest = FALSE;
1617+ Bool oldPktProto = FALSE;
1618+
1619+ ASSERT(sk);
1620+ ASSERT(pkt);
1621+ ASSERT(sk->sk_state == SS_LISTEN);
1622+
1623+ err = 0;
1624+
1625+ /*
1626+ * Because we are in the listen state, we could be receiving a packet for
1627+ * ourself or any previous connection requests that we received. If it's
1628+ * the latter, we try to find a socket in our list of pending connections
1629+ * and, if we do, call the appropriate handler for the state that that
1630+ * socket is in. Otherwise we try to service the connection request.
1631+ */
1632+ pending = VSockVmciGetPending(sk, pkt);
1633+ if (pending) {
1634+ lock_sock(pending);
1635+ switch (pending->sk_state) {
1636+ case SS_CONNECTING:
1637+ err = VSockVmciRecvConnectingServer(sk, pending, pkt);
1638+ break;
1639+ default:
1640+ VSOCK_SEND_RESET(pending, pkt);
1641+ err = -EINVAL;
1642+ }
1643+
1644+ if (err < 0) {
1645+ VSockVmciRemovePending(sk, pending);
1646+ }
1647+
1648+ release_sock(pending);
1649+ VSockVmciReleasePending(pending);
1650+
1651+ return err;
1652+ }
1653+
1654+ /*
1655+ * The listen state only accepts connection requests. Reply with a reset
1656+ * unless we received a reset.
1657+ */
1658+
1659+ if (!(pkt->type == VSOCK_PACKET_TYPE_REQUEST ||
1660+ pkt->type == VSOCK_PACKET_TYPE_REQUEST2)) {
1661+ VSOCK_REPLY_RESET(pkt);
1662+ return -EINVAL;
1663+ }
1664+
1665+ if (pkt->u.size == 0) {
1666+ VSOCK_REPLY_RESET(pkt);
1667+ return -EINVAL;
1668+ }
1669+
1670+ /*
1671+ * If this socket can't accommodate this connection request, we send
1672+ * a reset. Otherwise we create and initialize a child socket and reply
1673+ * with a connection negotiation.
1674+ */
1675+ if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
1676+ VSOCK_REPLY_RESET(pkt);
1677+ return -ECONNREFUSED;
1678+ }
1679+
1680+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
1681+ pending = __VSockVmciCreate(NULL, sk, GFP_KERNEL, sk->sk_type);
1682+#else
1683+ pending = __VSockVmciCreate(compat_sock_net(sk), NULL, sk, GFP_KERNEL,
1684+ sk->sk_type);
1685+#endif
1686+ if (!pending) {
1687+ VSOCK_SEND_RESET(sk, pkt);
1688+ return -ENOMEM;
1689+ }
1690+
1691+ vpending = vsock_sk(pending);
1692+ ASSERT(vpending);
1693+ ASSERT(vsock_sk(sk)->localAddr.svm_port == pkt->dstPort);
1694+
1695+ VSockAddr_Init(&vpending->localAddr,
1696+ VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.dst),
1697+ pkt->dstPort);
1698+ VSockAddr_Init(&vpending->remoteAddr,
1699+ VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src),
1700+ pkt->srcPort);
1701+
1702+ /*
1703+ * If the proposed size fits within our min/max, accept
1704+ * it. Otherwise propose our own size.
1705+ */
1706+ if (pkt->u.size >= vpending->queuePairMinSize &&
1707+ pkt->u.size <= vpending->queuePairMaxSize) {
1708+ qpSize = pkt->u.size;
1709+ } else {
1710+ qpSize = vpending->queuePairSize;
1711+ }
1712+
1713+ /*
1714+ * Figure out if we are using old or new requests based on the overrides
1715+ * pkt types sent by our peer.
1716+ */
1717+ if (VSockVmciOldProtoOverride(&oldPktProto)) {
1718+ oldRequest = oldPktProto;
1719+ } else {
1720+ if (pkt->type == VSOCK_PACKET_TYPE_REQUEST) {
1721+ oldRequest = TRUE;
1722+ } else if (pkt->type == VSOCK_PACKET_TYPE_REQUEST2) {
1723+ oldRequest = FALSE;
1724+ }
1725+ }
1726+
1727+ if (oldRequest) {
1728+ /* Handle a REQUEST (or override) */
1729+ VSockProtoVersion version = VSOCK_PROTO_INVALID;
1730+ if (VSockVmciProtoToNotifyStruct(pending, &version, TRUE)) {
1731+ err = VSOCK_SEND_NEGOTIATE(pending, qpSize);
1732+ } else {
1733+ err = -EINVAL;
1734+ }
1735+ } else {
1736+ /* Handle a REQUEST2 (or override) */
1737+ int protoInt = pkt->proto;
1738+ int pos;
1739+ uint16 activeProtoVersion = 0;
1740+
1741+ /*
1742+ * The list of possible protocols is the intersection of all protocols
1743+ * the client supports ... plus all the protocols we support.
1744+ */
1745+ protoInt &= VSockVmciNewProtoSupportedVersions();
1746+
1747+ /* We choose the highest possible protocol version and use that one. */
1748+ pos = mssb32(protoInt);
1749+ if (pos) {
1750+ activeProtoVersion = (1 << (pos - 1));
1751+ if (VSockVmciProtoToNotifyStruct(pending, &activeProtoVersion, FALSE)) {
1752+ err = VSOCK_SEND_NEGOTIATE2(pending, qpSize,
1753+ activeProtoVersion);
1754+ } else {
1755+ err = -EINVAL;
1756+ }
1757+ } else {
1758+ err = -EINVAL;
1759+ }
1760+ }
1761+
1762+ if (err < 0) {
1763+ VSOCK_SEND_RESET(sk, pkt);
1764+ sock_put(pending);
1765+ err = VSockVmci_ErrorToVSockError(err);
1766+ goto out;
1767+ }
1768+
1769+ VSockVmciAddPending(sk, pending);
1770+ sk->sk_ack_backlog++;
1771+
1772+ pending->sk_state = SS_CONNECTING;
1773+ vpending->produceSize = vpending->consumeSize = qpSize;
1774+ vpending->queuePairSize = qpSize;
1775+
1776+ NOTIFYCALL(vpending, processRequest, pending);
1777+
1778+ /*
1779+ * We might never receive another message for this socket and it's not
1780+ * connected to any process, so we have to ensure it gets cleaned up
1781+ * ourself. Our delayed work function will take care of that. Note that we
1782+ * do not ever cancel this function since we have few guarantees about its
1783+ * state when calling cancel_delayed_work(). Instead we hold a reference on
1784+ * the socket for that function and make it capable of handling cases where
1785+ * it needs to do nothing but release that reference.
1786+ */
1787+ vpending->listener = sk;
1788+ sock_hold(sk);
1789+ sock_hold(pending);
1790+ COMPAT_INIT_DELAYED_WORK(&vpending->dwork, VSockVmciPendingWork, vpending);
1791+ compat_schedule_delayed_work(&vpending->dwork, HZ);
1792+
1793+out:
1794+ return err;
1795+}
1796+
1797+
1798+/*
1799+ *----------------------------------------------------------------------------
1800+ *
1801+ * VSockVmciRecvConnectingServer --
1802+ *
1803+ * Receives packets for sockets in the connecting state on the server side.
1804+ *
1805+ * Connecting sockets on the server side can only receive queue pair offer
1806+ * packets. All others should be treated as cause for closing the
1807+ * connection.
1808+ *
1809+ * Note that this assumes the socket lock is held for both sk and pending.
1810+ *
1811+ * Results:
1812+ * Zero on success, negative error code on failure.
1813+ *
1814+ * Side effects:
1815+ * A queue pair may be created, an attach control packet may be sent, the
1816+ * socket may transition to the connected state, and a pending caller in
1817+ * accept() may be woken up.
1818+ *
1819+ *----------------------------------------------------------------------------
1820+ */
1821+
1822+static int
1823+VSockVmciRecvConnectingServer(struct sock *listener, // IN: the listening socket
1824+ struct sock *pending, // IN: the pending connection
1825+ VSockPacket *pkt) // IN: current packet
1826+{
1827+ VSockVmciSock *vpending;
1828+ VMCIHandle handle;
1829+ VMCIQPair *qpair;
1830+ Bool isLocal;
1831+ uint32 flags;
1832+ VMCIId detachSubId;
1833+ int err;
1834+ int skerr;
1835+
1836+ ASSERT(listener);
1837+ ASSERT(pkt);
1838+ ASSERT(listener->sk_state == SS_LISTEN);
1839+ ASSERT(pending->sk_state == SS_CONNECTING);
1840+
1841+ vpending = vsock_sk(pending);
1842+ detachSubId = VMCI_INVALID_ID;
1843+
1844+ switch (pkt->type) {
1845+ case VSOCK_PACKET_TYPE_OFFER:
1846+ if (VMCI_HANDLE_INVALID(pkt->u.handle)) {
1847+ VSOCK_SEND_RESET(pending, pkt);
1848+ skerr = EPROTO;
1849+ err = -EINVAL;
1850+ goto destroy;
1851+ }
1852+ break;
1853+ default:
1854+ /* Close and cleanup the connection. */
1855+ VSOCK_SEND_RESET(pending, pkt);
1856+ skerr = EPROTO;
1857+ err = pkt->type == VSOCK_PACKET_TYPE_RST ?
1858+ 0 :
1859+ -EINVAL;
1860+ goto destroy;
1861+ }
1862+
1863+ ASSERT(pkt->type == VSOCK_PACKET_TYPE_OFFER);
1864+
1865+ /*
1866+ * In order to complete the connection we need to attach to the offered
1867+ * queue pair and send an attach notification. We also subscribe to the
1868+ * detach event so we know when our peer goes away, and we do that before
1869+ * attaching so we don't miss an event. If all this succeeds, we update our
1870+ * state and wakeup anything waiting in accept() for a connection.
1871+ */
1872+
1873+ /*
1874+ * We don't care about attach since we ensure the other side has attached by
1875+ * specifying the ATTACH_ONLY flag below.
1876+ */
1877+ err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_DETACH,
1878+ VMCI_FLAG_EVENT_NONE,
1879+ VSockVmciPeerDetachCB,
1880+ pending,
1881+ &detachSubId);
1882+ if (err < VMCI_SUCCESS) {
1883+ VSOCK_SEND_RESET(pending, pkt);
1884+ err = VSockVmci_ErrorToVSockError(err);
1885+ skerr = -err;
1886+ goto destroy;
1887+ }
1888+
1889+ vpending->detachSubId = detachSubId;
1890+
1891+ /* Now attach to the queue pair the client created. */
1892+ handle = pkt->u.handle;
1893+
1894+ /*
1895+ * vpending->localAddr always has a context id so we do not
1896+ * need to worry about VMADDR_CID_ANY in this case.
1897+ */
1898+ isLocal = vpending->remoteAddr.svm_cid == vpending->localAddr.svm_cid;
1899+ flags = VMCI_QPFLAG_ATTACH_ONLY;
1900+ flags |= isLocal ? VMCI_QPFLAG_LOCAL : 0;
1901+
1902+ err = VSockVmciQueuePairAlloc(&qpair,
1903+ &handle,
1904+ vpending->produceSize,
1905+ vpending->consumeSize,
1906+ VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src),
1907+ flags,
1908+ VSockVmciTrusted(vpending,
1909+ vpending->remoteAddr.svm_cid));
1910+ if (err < 0) {
1911+ VSOCK_SEND_RESET(pending, pkt);
1912+ skerr = -err;
1913+ goto destroy;
1914+ }
1915+
1916+ ASSERT(VMCI_HANDLE_EQUAL(handle, pkt->u.handle));
1917+ vpending->qpHandle = handle;
1918+ vpending->qpair = qpair;
1919+
1920+ /*
1921+ * When we send the attach message, we must be ready to handle
1922+ * incoming control messages on the newly connected socket. So we
1923+ * move the pending socket to the connected state before sending
1924+ * the attach message. Otherwise, an incoming packet triggered by
1925+ * the attach being received by the peer may be processed
1926+ * concurrently with what happens below after sending the attach
1927+ * message, and that incoming packet will find the listening socket
1928+ * instead of the (currently) pending socket. Note that enqueueing
1929+ * the socket increments the reference count, so even if a reset
1930+ * comes before the connection is accepted, the socket will be
1931+ * valid until it is removed from the queue.
1932+ *
1933+ * If we fail sending the attach below, we remove the socket from
1934+ * the connected list and move the socket to SS_UNCONNECTED before
1935+ * releasing the lock, so a pending slow path processing of an
1936+ * incoming packet will not see the socket in the connected state
1937+ * in that case.
1938+ */
1939+ pending->sk_state = SS_CONNECTED;
1940+
1941+ VSockVmciInsertConnected(vsockConnectedSocketsVsk(vpending), pending);
1942+
1943+ /* Notify our peer of our attach. */
1944+ err = VSOCK_SEND_ATTACH(pending, handle);
1945+ if (err < 0) {
1946+ VSockVmciRemoveConnected(pending);
1947+ Log("Could not send attach\n");
1948+ VSOCK_SEND_RESET(pending, pkt);
1949+ err = VSockVmci_ErrorToVSockError(err);
1950+ skerr = -err;
1951+ goto destroy;
1952+ }
1953+
1954+ /*
1955+ * We have a connection. Move the now connected socket from the
1956+ * listener's pending list to the accept queue so callers of
1957+ * accept() can find it.
1958+ */
1959+ VSockVmciRemovePending(listener, pending);
1960+ VSockVmciEnqueueAccept(listener, pending);
1961+
1962+ /*
1963+ * Callers of accept() will be be waiting on the listening socket, not the
1964+ * pending socket.
1965+ */
1966+ listener->sk_state_change(listener);
1967+
1968+ return 0;
1969+
1970+destroy:
1971+ pending->sk_err = skerr;
1972+ pending->sk_state = SS_UNCONNECTED;
1973+ /*
1974+ * As long as we drop our reference, all necessary cleanup will handle when
1975+ * the cleanup function drops its reference and our destruct implementation
1976+ * is called. Note that since the listen handler will remove pending from
1977+ * the pending list upon our failure, the cleanup function won't drop the
1978+ * additional reference, which is why we do it here.
1979+ */
1980+ sock_put(pending);
1981+
1982+ return err;
1983+}
1984+
1985+
1986+/*
1987+ *----------------------------------------------------------------------------
1988+ *
1989+ * VSockVmciRecvConnectingClient --
1990+ *
1991+ * Receives packets for sockets in the connecting state on the client side.
1992+ *
1993+ * Connecting sockets on the client side should only receive attach packets.
1994+ * All others should be treated as cause for closing the connection.
1995+ *
1996+ * Note that this assumes the socket lock is held for both sk and pending.
1997+ *
1998+ * Results:
1999+ * Zero on success, negative error code on failure.
2000+ *
2001+ * Side effects:
2002+ * The socket may transition to the connected state and wakeup the pending
2003+ * caller of connect().
2004+ *
2005+ *----------------------------------------------------------------------------
2006+ */
2007+
2008+static int
2009+VSockVmciRecvConnectingClient(struct sock *sk, // IN: socket
2010+ VSockPacket *pkt) // IN: current packet
2011+{
2012+ VSockVmciSock *vsk;
2013+ int err;
2014+ int skerr;
2015+
2016+ ASSERT(sk);
2017+ ASSERT(pkt);
2018+ ASSERT(sk->sk_state == SS_CONNECTING);
2019+
2020+ vsk = vsock_sk(sk);
2021+
2022+ switch (pkt->type) {
2023+ case VSOCK_PACKET_TYPE_ATTACH:
2024+ if (VMCI_HANDLE_INVALID(pkt->u.handle) ||
2025+ !VMCI_HANDLE_EQUAL(pkt->u.handle, vsk->qpHandle)) {
2026+ skerr = EPROTO;
2027+ err = -EINVAL;
2028+ goto destroy;
2029+ }
2030+
2031+ /*
2032+ * Signify the socket is connected and wakeup the waiter in connect().
2033+ * Also place the socket in the connected table for accounting (it can
2034+ * already be found since it's in the bound table).
2035+ */
2036+ sk->sk_state = SS_CONNECTED;
2037+ sk->sk_socket->state = SS_CONNECTED;
2038+ VSockVmciInsertConnected(vsockConnectedSocketsVsk(vsk), sk);
2039+ sk->sk_state_change(sk);
2040+
2041+ break;
2042+ case VSOCK_PACKET_TYPE_NEGOTIATE:
2043+ case VSOCK_PACKET_TYPE_NEGOTIATE2:
2044+ if (pkt->u.size == 0 ||
2045+ VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src) != vsk->remoteAddr.svm_cid ||
2046+ pkt->srcPort != vsk->remoteAddr.svm_port ||
2047+ !VMCI_HANDLE_INVALID(vsk->qpHandle) ||
2048+ vsk->qpair ||
2049+ vsk->produceSize != 0 ||
2050+ vsk->consumeSize != 0 ||
2051+ vsk->attachSubId != VMCI_INVALID_ID ||
2052+ vsk->detachSubId != VMCI_INVALID_ID) {
2053+ skerr = EPROTO;
2054+ err = -EINVAL;
2055+
2056+ goto destroy;
2057+ }
2058+
2059+ err = VSockVmciRecvConnectingClientNegotiate(sk, pkt);
2060+ if (err) {
2061+ skerr = -err;
2062+ goto destroy;
2063+ }
2064+
2065+ break;
2066+ case VSOCK_PACKET_TYPE_INVALID:
2067+ err = VSockVmciRecvConnectingClientInvalid(sk, pkt);
2068+ if (err) {
2069+ skerr = -err;
2070+ goto destroy;
2071+ }
2072+
2073+ break;
2074+ case VSOCK_PACKET_TYPE_RST:
2075+ /*
2076+ * Older versions of the linux code (WS 6.5 / ESX 4.0) used to continue
2077+ * processing here after they sent an INVALID packet. This meant that we
2078+ * got a RST after the INVALID. We ignore a RST after an INVALID. The
2079+ * common code doesn't send the RST ... so we can hang if an old version
2080+ * of the common code fails between getting a REQUEST and sending an
2081+ * OFFER back. Not much we can do about it... except hope that it
2082+ * doesn't happen.
2083+ */
2084+ if (vsk->ignoreConnectingRst) {
2085+ vsk->ignoreConnectingRst = FALSE;
2086+ } else {
2087+ skerr = ECONNRESET;
2088+ err = 0;
2089+ goto destroy;
2090+ }
2091+
2092+ break;
2093+ default:
2094+ /* Close and cleanup the connection. */
2095+ skerr = EPROTO;
2096+ err = -EINVAL;
2097+ goto destroy;
2098+ }
2099+
2100+ ASSERT(pkt->type == VSOCK_PACKET_TYPE_ATTACH ||
2101+ pkt->type == VSOCK_PACKET_TYPE_NEGOTIATE ||
2102+ pkt->type == VSOCK_PACKET_TYPE_NEGOTIATE2 ||
2103+ pkt->type == VSOCK_PACKET_TYPE_INVALID ||
2104+ pkt->type == VSOCK_PACKET_TYPE_RST);
2105+
2106+ return 0;
2107+
2108+destroy:
2109+ VSOCK_SEND_RESET(sk, pkt);
2110+
2111+ sk->sk_state = SS_UNCONNECTED;
2112+ sk->sk_err = skerr;
2113+ sk->sk_error_report(sk);
2114+ return err;
2115+}
2116+
2117+
2118+/*
2119+ *----------------------------------------------------------------------------
2120+ *
2121+ * VSockVmciRecvConnectingClientNegotiate --
2122+ *
2123+ * Handles a negotiate packet for a client in the connecting state.
2124+ *
2125+ * Note that this assumes the socket lock is held for both sk and pending.
2126+ *
2127+ * Results:
2128+ * Zero on success, negative error code on failure.
2129+ *
2130+ * Side effects:
2131+ * The socket may transition to the connected state and wakeup the pending
2132+ * caller of connect().
2133+ *
2134+ *----------------------------------------------------------------------------
2135+ */
2136+
2137+static int
2138+VSockVmciRecvConnectingClientNegotiate(struct sock *sk, // IN: socket
2139+ VSockPacket *pkt) // IN: current packet
2140+{
2141+ int err;
2142+ VSockVmciSock *vsk;
2143+ VMCIHandle handle;
2144+ VMCIQPair *qpair;
2145+ VMCIId attachSubId;
2146+ VMCIId detachSubId;
2147+ Bool isLocal;
2148+ uint32 flags;
2149+ Bool oldProto = TRUE;
2150+ Bool oldPktProto;
2151+ VSockProtoVersion version;
2152+
2153+ vsk = vsock_sk(sk);
2154+ handle = VMCI_INVALID_HANDLE;
2155+ attachSubId = VMCI_INVALID_ID;
2156+ detachSubId = VMCI_INVALID_ID;
2157+
2158+ ASSERT(sk);
2159+ ASSERT(pkt);
2160+ ASSERT(pkt->u.size > 0);
2161+ ASSERT(vsk->remoteAddr.svm_cid == VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src));
2162+ ASSERT(vsk->remoteAddr.svm_port == pkt->srcPort);
2163+ ASSERT(VMCI_HANDLE_INVALID(vsk->qpHandle));
2164+ ASSERT(vsk->qpair == NULL);
2165+ ASSERT(vsk->produceSize == 0);
2166+ ASSERT(vsk->consumeSize == 0);
2167+ ASSERT(vsk->attachSubId == VMCI_INVALID_ID);
2168+ ASSERT(vsk->detachSubId == VMCI_INVALID_ID);
2169+
2170+ /*
2171+ * If we have gotten here then we should be past the point where old linux
2172+ * vsock could have sent the bogus rst.
2173+ */
2174+ vsk->sentRequest = FALSE;
2175+ vsk->ignoreConnectingRst = FALSE;
2176+
2177+ /* Verify that we're OK with the proposed queue pair size */
2178+ if (pkt->u.size < vsk->queuePairMinSize ||
2179+ pkt->u.size > vsk->queuePairMaxSize) {
2180+ err = -EINVAL;
2181+ goto destroy;
2182+ }
2183+
2184+ /*
2185+ * At this point we know the CID the peer is using to talk to us.
2186+ */
2187+
2188+ if (vsk->localAddr.svm_cid == VMADDR_CID_ANY) {
2189+ vsk->localAddr.svm_cid = VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.dst);
2190+ }
2191+
2192+ /*
2193+ * Setup the notify ops to be the highest supported version that both the
2194+ * server and the client support.
2195+ */
2196+
2197+ if (VSockVmciOldProtoOverride(&oldPktProto)) {
2198+ oldProto = oldPktProto;
2199+ } else {
2200+ if (pkt->type == VSOCK_PACKET_TYPE_NEGOTIATE) {
2201+ oldProto = TRUE;
2202+ } else if (pkt->type == VSOCK_PACKET_TYPE_NEGOTIATE2) {
2203+ oldProto = FALSE;
2204+ }
2205+ }
2206+
2207+ if (oldProto) {
2208+ version = VSOCK_PROTO_INVALID;
2209+ } else {
2210+ version = pkt->proto;
2211+ }
2212+
2213+ if (!VSockVmciProtoToNotifyStruct(sk, &version, oldProto)) {
2214+ err = -EINVAL;
2215+ goto destroy;
2216+ }
2217+
2218+ /*
2219+ * Subscribe to attach and detach events first.
2220+ *
2221+ * XXX We attach once for each queue pair created for now so it is easy
2222+ * to find the socket (it's provided), but later we should only subscribe
2223+ * once and add a way to lookup sockets by queue pair handle.
2224+ */
2225+ err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_ATTACH,
2226+ VMCI_FLAG_EVENT_NONE,
2227+ VSockVmciPeerAttachCB,
2228+ sk,
2229+ &attachSubId);
2230+ if (err < VMCI_SUCCESS) {
2231+ err = VSockVmci_ErrorToVSockError(err);
2232+ goto destroy;
2233+ }
2234+
2235+ err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_DETACH,
2236+ VMCI_FLAG_EVENT_NONE,
2237+ VSockVmciPeerDetachCB,
2238+ sk,
2239+ &detachSubId);
2240+ if (err < VMCI_SUCCESS) {
2241+ err = VSockVmci_ErrorToVSockError(err);
2242+ goto destroy;
2243+ }
2244+
2245+ /* Make VMCI select the handle for us. */
2246+ handle = VMCI_INVALID_HANDLE;
2247+ isLocal = vsk->remoteAddr.svm_cid == vsk->localAddr.svm_cid;
2248+ flags = isLocal ? VMCI_QPFLAG_LOCAL : 0;
2249+
2250+ err = VSockVmciQueuePairAlloc(&qpair,
2251+ &handle,
2252+ pkt->u.size,
2253+ pkt->u.size,
2254+ vsk->remoteAddr.svm_cid,
2255+ flags,
2256+ VSockVmciTrusted(vsk, vsk->remoteAddr.svm_cid));
2257+ if (err < 0) {
2258+ goto destroy;
2259+ }
2260+
2261+ err = VSOCK_SEND_QP_OFFER(sk, handle);
2262+ if (err < 0) {
2263+ err = VSockVmci_ErrorToVSockError(err);
2264+ goto destroy;
2265+ }
2266+
2267+ vsk->qpHandle = handle;
2268+ vsk->qpair = qpair;
2269+
2270+ vsk->produceSize = vsk->consumeSize = pkt->u.size;
2271+
2272+ vsk->attachSubId = attachSubId;
2273+ vsk->detachSubId = detachSubId;
2274+
2275+ NOTIFYCALL(vsk, processNegotiate, sk);
2276+
2277+ return 0;
2278+
2279+destroy:
2280+ if (attachSubId != VMCI_INVALID_ID) {
2281+ VMCIEvent_Unsubscribe(attachSubId);
2282+ ASSERT(vsk->attachSubId == VMCI_INVALID_ID);
2283+ }
2284+
2285+ if (detachSubId != VMCI_INVALID_ID) {
2286+ VMCIEvent_Unsubscribe(detachSubId);
2287+ ASSERT(vsk->detachSubId == VMCI_INVALID_ID);
2288+ }
2289+
2290+ if (!VMCI_HANDLE_INVALID(handle)) {
2291+ ASSERT(vsk->qpair);
2292+ VMCIQPair_Detach(&qpair);
2293+ ASSERT(VMCI_HANDLE_INVALID(vsk->qpHandle));
2294+ }
2295+
2296+ return err;
2297+}
2298+
2299+
2300+/*
2301+ *----------------------------------------------------------------------------
2302+ *
2303+ * VSockVmciRecvConnectingClientInvalid --
2304+ *
2305+ * Handles an invalid packet for a client in the connecting state.
2306+ *
2307+ * Note that this assumes the socket lock is held for both sk and pending.
2308+ *
2309+ * Results:
2310+ * Zero on success, negative error code on failure.
2311+ *
2312+ * Side effects:
2313+ * None.
2314+ *
2315+ *----------------------------------------------------------------------------
2316+ */
2317+
2318+static int
2319+VSockVmciRecvConnectingClientInvalid(struct sock *sk, // IN: socket
2320+ VSockPacket *pkt) // IN: current packet
2321+{
2322+ int err = 0;
2323+ VSockVmciSock *vsk;
2324+
2325+ ASSERT(sk);
2326+ ASSERT(pkt);
2327+
2328+ vsk = vsock_sk(sk);
2329+
2330+ if (vsk->sentRequest) {
2331+ vsk->sentRequest = FALSE;
2332+ vsk->ignoreConnectingRst = TRUE;
2333+
2334+ err = VSOCK_SEND_CONN_REQUEST(sk, vsk->queuePairSize);
2335+ if (err < 0) {
2336+ err = VSockVmci_ErrorToVSockError(err);
2337+ } else {
2338+ err = 0;
2339+ }
2340+ }
2341+
2342+ return err;
2343+}
2344+
2345+
2346+/*
2347+ *----------------------------------------------------------------------------
2348+ *
2349+ * VSockVmciRecvConnected --
2350+ *
2351+ * Receives packets for sockets in the connected state.
2352+ *
2353+ * Connected sockets should only ever receive detach, wrote, read, or reset
2354+ * control messages. Others are treated as errors that are ignored.
2355+ *
2356+ * Wrote and read signify that the peer has produced or consumed,
2357+ * respectively.
2358+ *
2359+ * Detach messages signify that the connection is being closed cleanly and
2360+ * reset messages signify that the connection is being closed in error.
2361+ *
2362+ * Note that this assumes the socket lock is held.
2363+ *
2364+ * Results:
2365+ * Zero on success, negative error code on failure.
2366+ *
2367+ * Side effects:
2368+ * A queue pair may be created, an offer control packet sent, and the socket
2369+ * may transition to the connecting state.
2370+ *
2371+ *
2372+ *----------------------------------------------------------------------------
2373+ */
2374+
2375+static int
2376+VSockVmciRecvConnected(struct sock *sk, // IN
2377+ VSockPacket *pkt) // IN
2378+{
2379+ VSockVmciSock *vsk;
2380+ Bool pktProcessed = FALSE;
2381+
2382+ ASSERT(sk);
2383+ ASSERT(pkt);
2384+ ASSERT(sk->sk_state == SS_CONNECTED);
2385+
2386+ /*
2387+ * In cases where we are closing the connection, it's sufficient to mark
2388+ * the state change (and maybe error) and wake up any waiting threads.
2389+ * Since this is a connected socket, it's owned by a user process and will
2390+ * be cleaned up when the failure is passed back on the current or next
2391+ * system call. Our system call implementations must therefore check for
2392+ * error and state changes on entry and when being awoken.
2393+ */
2394+ switch (pkt->type) {
2395+ case VSOCK_PACKET_TYPE_SHUTDOWN:
2396+ if (pkt->u.mode) {
2397+ vsk = vsock_sk(sk);
2398+
2399+ vsk->peerShutdown |= pkt->u.mode;
2400+ sk->sk_state_change(sk);
2401+ }
2402+ break;
2403+
2404+ case VSOCK_PACKET_TYPE_RST:
2405+ vsk = vsock_sk(sk);
2406+ /*
2407+ * It is possible that we sent our peer a message (e.g
2408+ * a WAITING_READ) right before we got notified that the peer
2409+ * had detached. If that happens then we can get a RST pkt back
2410+ * from our peer even though there is data available for us
2411+ * to read. In that case, don't shutdown the socket completely
2412+ * but instead allow the local client to finish reading data
2413+ * off the queuepair. Always treat a RST pkt in connected mode
2414+ * like a clean shutdown.
2415+ */
2416+ sock_set_flag(sk, SOCK_DONE);
2417+ vsk->peerShutdown = SHUTDOWN_MASK;
2418+ if (VSockVmciStreamHasData(vsk) <= 0) {
2419+ sk->sk_state = SS_DISCONNECTING;
2420+ }
2421+ sk->sk_state_change(sk);
2422+ break;
2423+
2424+ default:
2425+ vsk = vsock_sk(sk);
2426+ NOTIFYCALL(vsk, handleNotifyPkt, sk, pkt, FALSE, NULL, NULL,
2427+ &pktProcessed);
2428+ if (!pktProcessed) {
2429+ return -EINVAL;
2430+ }
2431+ break;
2432+ }
2433+
2434+ return 0;
2435+}
2436+
2437+
2438+/*
2439+ *----------------------------------------------------------------------------
2440+ *
2441+ * __VSockVmciSendControlPkt --
2442+ *
2443+ * Common code to send a control packet.
2444+ *
2445+ * Results:
2446+ * Size of datagram sent on success, negative error code otherwise.
2447+ * If convertError is TRUE, error code is a vsock error, otherwise,
2448+ * result is a VMCI error code.
2449+ *
2450+ * Side effects:
2451+ * None.
2452+ *
2453+ *----------------------------------------------------------------------------
2454+ */
2455+
2456+static int
2457+__VSockVmciSendControlPkt(VSockPacket *pkt, // IN
2458+ struct sockaddr_vm *src, // IN
2459+ struct sockaddr_vm *dst, // IN
2460+ VSockPacketType type, // IN
2461+ uint64 size, // IN
2462+ uint64 mode, // IN
2463+ VSockWaitingInfo *wait, // IN
2464+ VSockProtoVersion proto, // IN
2465+ VMCIHandle handle, // IN
2466+ Bool convertError) // IN
2467+{
2468+ int err;
2469+
2470+ ASSERT(pkt);
2471+ /*
2472+ * This function can be called in different contexts, so family value is not
2473+ * necessarily consistent.
2474+ */
2475+
2476+ VSOCK_ADDR_NOFAMILY_ASSERT(src);
2477+ VSOCK_ADDR_NOFAMILY_ASSERT(dst);
2478+
2479+ VSockPacket_Init(pkt, src, dst, type, size, mode, wait, proto, handle);
2480+ LOG_PACKET(pkt);
2481+ VSOCK_STATS_CTLPKT_LOG(pkt->type);
2482+ err = VMCIDatagram_Send(&pkt->dg);
2483+ if (convertError && (err < 0)) {
2484+ return VSockVmci_ErrorToVSockError(err);
2485+ }
2486+
2487+ return err;
2488+}
2489+
2490+
2491+/*
2492+ *----------------------------------------------------------------------------
2493+ *
2494+ * VSockVmciReplyControlPktFast --
2495+ *
2496+ * Sends a control packet back to the source of an incoming packet.
2497+ * The control packet is allocated in the stack.
2498+ *
2499+ * Results:
2500+ * Size of datagram sent on success, negative error code otherwise.
2501+ *
2502+ * Side effects:
2503+ * None.
2504+ *
2505+ *----------------------------------------------------------------------------
2506+ */
2507+
2508+int
2509+VSockVmciReplyControlPktFast(VSockPacket *pkt, // IN
2510+ VSockPacketType type, // IN
2511+ uint64 size, // IN
2512+ uint64 mode, // IN
2513+ VSockWaitingInfo *wait, // IN
2514+ VMCIHandle handle) // IN
2515+{
2516+ VSockPacket reply;
2517+ struct sockaddr_vm src, dst;
2518+
2519+ ASSERT(pkt);
2520+
2521+ if (pkt->type == VSOCK_PACKET_TYPE_RST) {
2522+ return 0;
2523+ } else {
2524+ VSockPacket_GetAddresses(pkt, &src, &dst);
2525+ return __VSockVmciSendControlPkt(&reply, &src, &dst, type,
2526+ size, mode, wait,
2527+ VSOCK_PROTO_INVALID, handle, TRUE);
2528+ }
2529+}
2530+
2531+
2532+/*
2533+ *----------------------------------------------------------------------------
2534+ *
2535+ * VSockVmciSendControlPktBH --
2536+ *
2537+ * Sends a control packet from bottom-half context. The control packet is
2538+ * static data to minimize the resource cost.
2539+ *
2540+ * Results:
2541+ * Size of datagram sent on success, negative error code otherwise. Note
2542+ * that we return a VMCI error message since that's what callers will need
2543+ * to provide.
2544+ *
2545+ * Side effects:
2546+ * None.
2547+ *
2548+ *----------------------------------------------------------------------------
2549+ */
2550+
2551+int
2552+VSockVmciSendControlPktBH(struct sockaddr_vm *src, // IN
2553+ struct sockaddr_vm *dst, // IN
2554+ VSockPacketType type, // IN
2555+ uint64 size, // IN
2556+ uint64 mode, // IN
2557+ VSockWaitingInfo *wait, // IN
2558+ VMCIHandle handle) // IN
2559+{
2560+ /*
2561+ * Note that it is safe to use a single packet across all CPUs since two
2562+ * tasklets of the same type are guaranteed to not ever run simultaneously.
2563+ * If that ever changes, or VMCI stops using tasklets, we can use per-cpu
2564+ * packets.
2565+ */
2566+ static VSockPacket pkt;
2567+
2568+ return __VSockVmciSendControlPkt(&pkt, src, dst, type,
2569+ size, mode, wait, VSOCK_PROTO_INVALID,
2570+ handle, FALSE);
2571+}
2572+
2573+
2574+/*
2575+ *----------------------------------------------------------------------------
2576+ *
2577+ * VSockVmciSendControlPkt --
2578+ *
2579+ * Sends a control packet.
2580+ *
2581+ * Results:
2582+ * Size of datagram sent on success, negative error on failure.
2583+ *
2584+ * Side effects:
2585+ * None.
2586+ *
2587+ *----------------------------------------------------------------------------
2588+ */
2589+
2590+int
2591+VSockVmciSendControlPkt(struct sock *sk, // IN
2592+ VSockPacketType type, // IN
2593+ uint64 size, // IN
2594+ uint64 mode, // IN
2595+ VSockWaitingInfo *wait, // IN
2596+ VSockProtoVersion proto, // IN
2597+ VMCIHandle handle) // IN
2598+{
2599+ VSockPacket *pkt;
2600+ VSockVmciSock *vsk;
2601+ int err;
2602+
2603+ ASSERT(sk);
2604+ /*
2605+ * New sockets for connection establishment won't have socket structures
2606+ * yet; if one exists, ensure it is of the proper type.
2607+ */
2608+ ASSERT(sk->sk_socket ?
2609+ sk->sk_socket->type == SOCK_STREAM :
2610+ 1);
2611+
2612+ vsk = vsock_sk(sk);
2613+
2614+ if (!VSockAddr_Bound(&vsk->localAddr)) {
2615+ return -EINVAL;
2616+ }
2617+
2618+ if (!VSockAddr_Bound(&vsk->remoteAddr)) {
2619+ return -EINVAL;
2620+ }
2621+
2622+ pkt = kmalloc(sizeof *pkt, GFP_KERNEL);
2623+ if (!pkt) {
2624+ return -ENOMEM;
2625+ }
2626+
2627+ err = __VSockVmciSendControlPkt(pkt, &vsk->localAddr, &vsk->remoteAddr,
2628+ type, size, mode, wait, proto, handle,
2629+ TRUE);
2630+ kfree(pkt);
2631+
2632+ return err;
2633+}
2634+
2635+
2636+/*
2637+ *----------------------------------------------------------------------------
2638+ *
2639+ * __VSockVmciBind --
2640+ *
2641+ * Common functionality needed to bind the specified address to the
2642+ * VSocket. If VMADDR_CID_ANY or VMADDR_PORT_ANY are specified, the context
2643+ * ID or port are selected automatically.
2644+ *
2645+ * Results:
2646+ * Zero on success, negative error code on failure.
2647+ *
2648+ * Side effects:
2649+ * On success, a new datagram handle is created.
2650+ *
2651+ *----------------------------------------------------------------------------
2652+ */
2653+
2654+static int
2655+__VSockVmciBind(struct sock *sk, // IN/OUT
2656+ struct sockaddr_vm *addr) // IN
2657+{
2658+ static unsigned int port = LAST_RESERVED_PORT + 1;
2659+ struct sockaddr_vm newAddr;
2660+ VSockVmciSock *vsk;
2661+ VMCIId cid;
2662+ int err;
2663+
2664+ ASSERT(sk);
2665+ ASSERT(sk->sk_socket);
2666+ ASSERT(addr);
2667+
2668+ vsk = vsock_sk(sk);
2669+
2670+ /* First ensure this socket isn't already bound. */
2671+ if (VSockAddr_Bound(&vsk->localAddr)) {
2672+ return -EINVAL;
2673+ }
2674+
2675+ /*
2676+ * Now bind to the provided address or select appropriate values if none are
2677+ * provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that like AF_INET
2678+ * prevents binding to a non-local IP address (in most cases), we only allow
2679+ * binding to the local CID.
2680+ */
2681+ VSockAddr_Init(&newAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
2682+
2683+ cid = VMCI_GetContextID();
2684+ if (addr->svm_cid != cid &&
2685+ addr->svm_cid != VMADDR_CID_ANY) {
2686+ return -EADDRNOTAVAIL;
2687+ }
2688+
2689+ newAddr.svm_cid = addr->svm_cid;
2690+
2691+ switch (sk->sk_socket->type) {
2692+ case SOCK_STREAM: {
2693+ spin_lock_bh(&vsockTableLock);
2694+
2695+ if (addr->svm_port == VMADDR_PORT_ANY) {
2696+ Bool found = FALSE;
2697+ unsigned int i;
2698+
2699+ for (i = 0; i < MAX_PORT_RETRIES; i++) {
2700+ if (port <= LAST_RESERVED_PORT) {
2701+ port = LAST_RESERVED_PORT + 1;
2702+ }
2703+
2704+ newAddr.svm_port = port++;
2705+
2706+ if (!__VSockVmciFindBoundSocket(&newAddr)) {
2707+ found = TRUE;
2708+ break;
2709+ }
2710+ }
2711+
2712+ if (!found) {
2713+ err = -EADDRNOTAVAIL;
2714+ goto out;
2715+ }
2716+ } else {
2717+ /* If port is in reserved range, ensure caller has necessary privileges. */
2718+ if (addr->svm_port <= LAST_RESERVED_PORT &&
2719+ !capable(CAP_NET_BIND_SERVICE)) {
2720+ err = -EACCES;
2721+ goto out;
2722+ }
2723+
2724+ newAddr.svm_port = addr->svm_port;
2725+ if (__VSockVmciFindBoundSocket(&newAddr)) {
2726+ err = -EADDRINUSE;
2727+ goto out;
2728+ }
2729+
2730+ }
2731+ break;
2732+ }
2733+ case SOCK_DGRAM: {
2734+ uint32 flags = 0;
2735+
2736+ /* VMCI will select a resource ID for us if we provide VMCI_INVALID_ID. */
2737+ newAddr.svm_port = addr->svm_port == VMADDR_PORT_ANY ?
2738+ VMCI_INVALID_ID :
2739+ addr->svm_port;
2740+
2741+ if (newAddr.svm_port <= LAST_RESERVED_PORT &&
2742+ !capable(CAP_NET_BIND_SERVICE)) {
2743+ err = -EACCES;
2744+ goto out;
2745+ }
2746+
2747+ if (newAddr.svm_cid == VMADDR_CID_ANY) {
2748+ flags = VMCI_FLAG_ANYCID_DG_HND;
2749+ }
2750+
2751+ err = VSockVmciDatagramCreateHnd(newAddr.svm_port, flags,
2752+ VSockVmciRecvDgramCB,
2753+ sk, &vsk->dgHandle);
2754+ if (err < VMCI_SUCCESS) {
2755+ err = VSockVmci_ErrorToVSockError(err);
2756+ goto out;
2757+ }
2758+
2759+ newAddr.svm_port = VMCI_HANDLE_TO_RESOURCE_ID(vsk->dgHandle);
2760+ break;
2761+ }
2762+ default: {
2763+ err = -EINVAL;
2764+ goto out;
2765+ }
2766+ }
2767+ /*
2768+ * VSockVmci_GetAFValue() acquires a mutex and may sleep, so fill the
2769+ * field after unlocking socket tables.
2770+ */
2771+ VSockAddr_InitNoFamily(&vsk->localAddr, newAddr.svm_cid, newAddr.svm_port);
2772+
2773+ /*
2774+ * Remove stream sockets from the unbound list and add them to the hash
2775+ * table for easy lookup by its address. The unbound list is simply an
2776+ * extra entry at the end of the hash table, a trick used by AF_UNIX.
2777+ */
2778+ if (sk->sk_socket->type == SOCK_STREAM) {
2779+ __VSockVmciRemoveBound(sk);
2780+ __VSockVmciInsertBound(vsockBoundSockets(&vsk->localAddr), sk);
2781+ spin_unlock_bh(&vsockTableLock);
2782+ }
2783+ vsk->localAddr.svm_family = VSockVmci_GetAFValue();
2784+ VSOCK_ADDR_ASSERT(&vsk->localAddr);
2785+
2786+ return 0;
2787+
2788+out:
2789+ if (sk->sk_socket->type == SOCK_STREAM) {
2790+ spin_unlock_bh(&vsockTableLock);
2791+ }
2792+ return err;
2793+}
2794+
2795+
2796+/*
2797+ *----------------------------------------------------------------------------
2798+ *
2799+ * __VSockVmciCreate --
2800+ *
2801+ * Does the work to create the sock structure.
2802+ * Note: If sock is NULL then the type field must be non-zero.
2803+ * Otherwise, sock is non-NULL and the type of sock is used in the
2804+ * newly created socket.
2805+ *
2806+ * Results:
2807+ * sock structure on success, NULL on failure.
2808+ *
2809+ * Side effects:
2810+ * Allocated sk is added to the unbound sockets list iff it is owned by
2811+ * a struct socket.
2812+ *
2813+ *----------------------------------------------------------------------------
2814+ */
2815+
2816+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14)
2817+static struct sock *
2818+__VSockVmciCreate(struct socket *sock, // IN: Owning socket, may be NULL
2819+ struct sock *parent, // IN: Parent socket, may be NULL
2820+ unsigned int priority, // IN: Allocation flags
2821+ unsigned short type) // IN: Socket type if sock is NULL
2822+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
2823+static struct sock *
2824+__VSockVmciCreate(struct socket *sock, // IN: Owning socket, may be NULL
2825+ struct sock *parent, // IN: Parent socket, may be NULL
2826+ gfp_t priority, // IN: Allocation flags
2827+ unsigned short type) // IN: Socket type if sock is NULL
2828+#else
2829+static struct sock *
2830+__VSockVmciCreate(struct net *net, // IN: Network namespace
2831+ struct socket *sock, // IN: Owning socket, may be NULL
2832+ struct sock *parent, // IN: Parent socket, may be NULL
2833+ gfp_t priority, // IN: Allocation flags
2834+ unsigned short type) // IN: Socket type if sock is NULL
2835+
2836+#endif
2837+{
2838+ struct sock *sk;
2839+ VSockVmciSock *psk;
2840+ VSockVmciSock *vsk;
2841+
2842+ ASSERT((sock && !type) || (!sock && type));
2843+
2844+ vsk = NULL;
2845+
2846+ /*
2847+ * From 2.6.9 to until 2.6.12 sk_alloc() used a cache in
2848+ * the protocol structure, but you still had to specify the size and cache
2849+ * yourself.
2850+ * Most recently (in 2.6.24), sk_alloc() was changed to expect the
2851+ * network namespace, and the option to zero the sock was dropped.
2852+ *
2853+ */
2854+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
2855+ sk = sk_alloc(vsockVmciFamilyOps.family, priority,
2856+ vsockVmciProto.slab_obj_size, vsockVmciProto.slab);
2857+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24)
2858+ sk = sk_alloc(vsockVmciFamilyOps.family, priority, &vsockVmciProto, 1);
2859+#else
2860+ sk = sk_alloc(net, vsockVmciFamilyOps.family, priority, &vsockVmciProto);
2861+#endif
2862+ if (!sk) {
2863+ return NULL;
2864+ }
2865+
2866+ /*
2867+ * If we go this far, we know the socket family is registered, so there's no
2868+ * need to register it now.
2869+ */
2870+ compat_mutex_lock(&registrationMutex);
2871+ vsockVmciSocketCount++;
2872+ compat_mutex_unlock(&registrationMutex);
2873+
2874+ sock_init_data(sock, sk);
2875+
2876+ /*
2877+ * sk->sk_type is normally set in sock_init_data, but only if
2878+ * sock is non-NULL. We make sure that our sockets always have a type
2879+ * by setting it here if needed.
2880+ */
2881+ if (!sock) {
2882+ sk->sk_type = type;
2883+ }
2884+
2885+ vsk = vsock_sk(sk);
2886+ VSockAddr_Init(&vsk->localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
2887+ VSockAddr_Init(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
2888+
2889+ sk->sk_destruct = VSockVmciSkDestruct;
2890+ sk->sk_backlog_rcv = VSockVmciQueueRcvSkb;
2891+ sk->sk_state = 0;
2892+ sock_reset_flag(sk, SOCK_DONE);
2893+
2894+ INIT_LIST_HEAD(&vsk->boundTable);
2895+ INIT_LIST_HEAD(&vsk->connectedTable);
2896+ vsk->dgHandle = VMCI_INVALID_HANDLE;
2897+ vsk->qpHandle = VMCI_INVALID_HANDLE;
2898+ vsk->qpair = NULL;
2899+ vsk->produceSize = vsk->consumeSize = 0;
2900+ vsk->listener = NULL;
2901+ INIT_LIST_HEAD(&vsk->pendingLinks);
2902+ INIT_LIST_HEAD(&vsk->acceptQueue);
2903+ vsk->rejected = FALSE;
2904+ vsk->sentRequest = FALSE;
2905+ vsk->ignoreConnectingRst = FALSE;
2906+ vsk->attachSubId = vsk->detachSubId = VMCI_INVALID_ID;
2907+ vsk->peerShutdown = 0;
2908+
2909+ if (parent) {
2910+ psk = vsock_sk(parent);
2911+ vsk->trusted = psk->trusted;
2912+ vsk->owner = psk->owner;
2913+ vsk->queuePairSize = psk->queuePairSize;
2914+ vsk->queuePairMinSize = psk->queuePairMinSize;
2915+ vsk->queuePairMaxSize = psk->queuePairMaxSize;
2916+ vsk->connectTimeout = psk->connectTimeout;
2917+ } else {
2918+ vsk->trusted = capable(CAP_NET_ADMIN);
2919+ vsk->owner = current_uid();
2920+ vsk->queuePairSize = VSOCK_DEFAULT_QP_SIZE;
2921+ vsk->queuePairMinSize = VSOCK_DEFAULT_QP_SIZE_MIN;
2922+ vsk->queuePairMaxSize = VSOCK_DEFAULT_QP_SIZE_MAX;
2923+ vsk->connectTimeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
2924+ }
2925+
2926+ vsk->notifyOps = NULL;
2927+
2928+ if (sock) {
2929+ VSockVmciInsertBound(vsockUnboundSockets, sk);
2930+ }
2931+
2932+ return sk;
2933+}
2934+
2935+
2936+/*
2937+ *----------------------------------------------------------------------------
2938+ *
2939+ * __VSockVmciRelease --
2940+ *
2941+ * Releases the provided socket.
2942+ *
2943+ * Results:
2944+ * None.
2945+ *
2946+ * Side effects:
2947+ * Any pending sockets are also released.
2948+ *
2949+ *----------------------------------------------------------------------------
2950+ */
2951+
2952+static void
2953+__VSockVmciRelease(struct sock *sk) // IN
2954+{
2955+ if (sk) {
2956+ struct sk_buff *skb;
2957+ struct sock *pending;
2958+ struct VSockVmciSock *vsk;
2959+
2960+ vsk = vsock_sk(sk);
2961+ pending = NULL; /* Compiler warning. */
2962+
2963+ if (VSockVmciInBoundTable(sk)) {
2964+ VSockVmciRemoveBound(sk);
2965+ }
2966+
2967+ if (VSockVmciInConnectedTable(sk)) {
2968+ VSockVmciRemoveConnected(sk);
2969+ }
2970+
2971+ if (!VMCI_HANDLE_INVALID(vsk->dgHandle)) {
2972+ VMCIDatagram_DestroyHnd(vsk->dgHandle);
2973+ vsk->dgHandle = VMCI_INVALID_HANDLE;
2974+ }
2975+
2976+ lock_sock(sk);
2977+ sock_orphan(sk);
2978+ sk->sk_shutdown = SHUTDOWN_MASK;
2979+
2980+ while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
2981+ kfree_skb(skb);
2982+ }
2983+
2984+ /* Clean up any sockets that never were accepted. */
2985+ while ((pending = VSockVmciDequeueAccept(sk)) != NULL) {
2986+ __VSockVmciRelease(pending);
2987+ sock_put(pending);
2988+ }
2989+
2990+ release_sock(sk);
2991+ sock_put(sk);
2992+ }
2993+}
2994+
2995+
2996+/*
2997+ * Sock operations.
2998+ */
2999+
3000+/*
3001+ *----------------------------------------------------------------------------
3002+ *
3003+ * VSockVmciSkDestruct --
3004+ *
3005+ * Destroys the provided socket. This is called by sk_free(), which is
3006+ * invoked when the reference count of the socket drops to zero.
3007+ *
3008+ * Results:
3009+ * None.
3010+ *
3011+ * Side effects:
3012+ * Socket count is decremented.
3013+ *
3014+ *----------------------------------------------------------------------------
3015+ */
3016+
3017+static void
3018+VSockVmciSkDestruct(struct sock *sk) // IN
3019+{
3020+ VSockVmciSock *vsk;
3021+
3022+ vsk = vsock_sk(sk);
3023+
3024+ if (vsk->attachSubId != VMCI_INVALID_ID) {
3025+ VMCIEvent_Unsubscribe(vsk->attachSubId);
3026+ vsk->attachSubId = VMCI_INVALID_ID;
3027+ }
3028+
3029+ if (vsk->detachSubId != VMCI_INVALID_ID) {
3030+ VMCIEvent_Unsubscribe(vsk->detachSubId);
3031+ vsk->detachSubId = VMCI_INVALID_ID;
3032+ }
3033+
3034+ if (!VMCI_HANDLE_INVALID(vsk->qpHandle)) {
3035+ ASSERT(vsk->qpair);
3036+ VMCIQPair_Detach(&vsk->qpair);
3037+ vsk->qpHandle = VMCI_INVALID_HANDLE;
3038+ ASSERT(vsk->qpair == NULL);
3039+ vsk->produceSize = vsk->consumeSize = 0;
3040+ }
3041+
3042+ /*
3043+ * Each list entry holds a reference on the socket, so we should not even be
3044+ * here if the socket is in one of our lists. If we are we have a stray
3045+ * sock_put() that needs to go away.
3046+ */
3047+ ASSERT(!VSockVmciInBoundTable(sk));
3048+ ASSERT(!VSockVmciInConnectedTable(sk));
3049+ ASSERT(!VSockVmciIsPending(sk));
3050+ ASSERT(!VSockVmciInAcceptQueue(sk));
3051+
3052+ /*
3053+ * When clearing these addresses, there's no need to set the family and
3054+ * possibly register the address family with the kernel.
3055+ */
3056+ VSockAddr_InitNoFamily(&vsk->localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3057+ VSockAddr_InitNoFamily(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3058+
3059+ NOTIFYCALL(vsk, socketDestruct, sk);
3060+
3061+ compat_mutex_lock(&registrationMutex);
3062+ vsockVmciSocketCount--;
3063+ VSockVmciTestUnregister();
3064+ compat_mutex_unlock(&registrationMutex);
3065+
3066+ VSOCK_STATS_CTLPKT_DUMP_ALL();
3067+ VSOCK_STATS_HIST_DUMP_ALL();
3068+ VSOCK_STATS_TOTALS_DUMP_ALL();
3069+}
3070+
3071+
3072+/*
3073+ *----------------------------------------------------------------------------
3074+ *
3075+ * VSockVmciQueueRcvSkb --
3076+ *
3077+ * Receives skb on the socket's receive queue.
3078+ *
3079+ * Results:
3080+ * Zero on success, negative error code on failure.
3081+ *
3082+ * Side effects:
3083+ * None.
3084+ *
3085+ *----------------------------------------------------------------------------
3086+ */
3087+
3088+static int
3089+VSockVmciQueueRcvSkb(struct sock *sk, // IN
3090+ struct sk_buff *skb) // IN
3091+{
3092+ int err;
3093+
3094+ err = sock_queue_rcv_skb(sk, skb);
3095+ if (err) {
3096+ kfree_skb(skb);
3097+ }
3098+
3099+ return err;
3100+}
3101+
3102+
3103+/*
3104+ *----------------------------------------------------------------------------
3105+ *
3106+ * VSockVmciRegisterProto --
3107+ *
3108+ * Registers the vmci sockets protocol family.
3109+ *
3110+ * Results:
3111+ * Zero on success, error code on failure.
3112+ *
3113+ * Side effects:
3114+ * None.
3115+ *
3116+ *----------------------------------------------------------------------------
3117+ */
3118+
3119+static int
3120+VSockVmciRegisterProto(void)
3121+{
3122+ int err = 0;
3123+
3124+ /*
3125+ * From 2.6.9 until 2.6.11, these address families called sk_alloc_slab()
3126+ * and the allocated slab was assigned to the slab variable in the proto
3127+ * struct and was created of size slab_obj_size. As of 2.6.12 and later,
3128+ * this slab allocation was moved
3129+ * into proto_register() and only done if you specified a non-zero value
3130+ * for the second argument (alloc_slab); the size of the slab element was
3131+ * changed to obj_size.
3132+ */
3133+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
3134+ err = sk_alloc_slab(&vsockVmciProto, "vsock");
3135+ if (err != 0) {
3136+ sk_alloc_slab_error(&vsockVmciProto);
3137+ }
3138+#else
3139+ /* Specify 1 as the second argument so the slab is created for us. */
3140+ err = proto_register(&vsockVmciProto, 1);
3141+#endif
3142+
3143+ return err;
3144+}
3145+
3146+
3147+/*
3148+ *----------------------------------------------------------------------------
3149+ *
3150+ * VSockVmciUnregisterProto --
3151+ *
3152+ * Unregisters the vmci sockets protocol family.
3153+ *
3154+ * Results:
3155+ * None.
3156+ *
3157+ * Side effects:
3158+ * None.
3159+ *
3160+ *----------------------------------------------------------------------------
3161+ */
3162+
3163+static void
3164+VSockVmciUnregisterProto(void)
3165+{
3166+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12)
3167+ sk_free_slab(&vsockVmciProto);
3168+#else
3169+ proto_unregister(&vsockVmciProto);
3170+#endif
3171+
3172+ VSOCK_STATS_RESET();
3173+}
3174+
3175+
3176+/*
3177+ *----------------------------------------------------------------------------
3178+ *
3179+ * VSockVmciRegisterAddressFamily --
3180+ *
3181+ * Registers our socket address family with the kernel.
3182+ *
3183+ * Note that this assumes the registration lock is held.
3184+ *
3185+ * Results:
3186+ * The address family value on success, negative error code on failure.
3187+ *
3188+ * Side effects:
3189+ * Callers of socket operations with the returned value, on success, will
3190+ * be able to use our socket implementation.
3191+ *
3192+ *----------------------------------------------------------------------------
3193+ */
3194+
3195+static int
3196+VSockVmciRegisterAddressFamily(void)
3197+{
3198+ int err = 0;
3199+ int i;
3200+
3201+ /*
3202+ * Linux will not allocate an address family to code that is not part of the
3203+ * kernel proper, so until that time comes we need a workaround. Here we
3204+ * loop through the allowed values and claim the first one that's not
3205+ * currently used. Users will then make an ioctl(2) into our module to
3206+ * retrieve this value before calling socket(2).
3207+ *
3208+ * This is undesirable, but it's better than having users' programs break
3209+ * when a hard-coded, currently-available value gets assigned to someone
3210+ * else in the future.
3211+ */
3212+ for (i = NPROTO - 1; i >= 0; i--) {
3213+ vsockVmciFamilyOps.family = i;
3214+ err = sock_register(&vsockVmciFamilyOps);
3215+ if (err) {
3216+ vsockVmciFamilyOps.family = VSOCK_INVALID_FAMILY;
3217+ } else {
3218+ vsockVmciDgramOps.family = i;
3219+ vsockVmciStreamOps.family = i;
3220+ err = i;
3221+ break;
3222+ }
3223+ }
3224+
3225+ if (VSOCK_INVALID_FAMILY == vsockVmciFamilyOps.family) {
3226+ Warning("Could not register address family.\n");
3227+ }
3228+
3229+ return err;
3230+}
3231+
3232+
3233+/*
3234+ *----------------------------------------------------------------------------
3235+ *
3236+ * VSockVmciUnregisterAddressFamily --
3237+ *
3238+ * Unregisters the address family with the kernel.
3239+ *
3240+ * Note that this assumes the registration lock is held.
3241+ *
3242+ * Results:
3243+ * None.
3244+ *
3245+ * Side effects:
3246+ * Our socket implementation is no longer accessible.
3247+ *
3248+ *----------------------------------------------------------------------------
3249+ */
3250+
3251+static void
3252+VSockVmciUnregisterAddressFamily(void)
3253+{
3254+ if (vsockVmciFamilyOps.family != VSOCK_INVALID_FAMILY) {
3255+ sock_unregister(vsockVmciFamilyOps.family);
3256+ }
3257+
3258+ vsockVmciDgramOps.family = vsockVmciFamilyOps.family = VSOCK_INVALID_FAMILY;
3259+ vsockVmciStreamOps.family = vsockVmciFamilyOps.family;
3260+}
3261+
3262+
3263+
3264+
3265+/*
3266+ *----------------------------------------------------------------------------
3267+ *
3268+ * VSockVmciRegisterWithVmci --
3269+ *
3270+ * Registers with the VMCI device, and creates control message
3271+ * and event handlers.
3272+ *
3273+ * Results:
3274+ * Zero on success, error code on failure.
3275+ *
3276+ * Side effects:
3277+ * None.
3278+ *
3279+ *----------------------------------------------------------------------------
3280+ */
3281+
3282+static int
3283+VSockVmciRegisterWithVmci(void)
3284+{
3285+ int err = 0;
3286+ uint32 apiVersion;
3287+
3288+ /*
3289+ * We don't call into the vmci module if the vmci device isn't
3290+ * present.
3291+ */
3292+ apiVersion = VMCI_KERNEL_API_VERSION_1;
3293+ vmciDevicePresent = VMCI_DeviceGet(&apiVersion, NULL, NULL, NULL);
3294+ if (!vmciDevicePresent) {
3295+ Warning("VMCI device not present.\n");
3296+ return -1;
3297+ }
3298+
3299+ /*
3300+ * Create the datagram handle that we will use to send and receive all
3301+ * VSocket control messages for this context.
3302+ */
3303+ err = VSockVmciDatagramCreateHnd(VSOCK_PACKET_RID,
3304+ VMCI_FLAG_ANYCID_DG_HND,
3305+ VSockVmciRecvStreamCB, NULL,
3306+ &vmciStreamHandle);
3307+ if (err < VMCI_SUCCESS) {
3308+ Warning("Unable to create datagram handle. (%d)\n", err);
3309+ err = VSockVmci_ErrorToVSockError(err);
3310+ goto out;
3311+ }
3312+
3313+ err = VMCIEvent_Subscribe(VMCI_EVENT_QP_RESUMED,
3314+ VMCI_FLAG_EVENT_NONE,
3315+ VSockVmciQPResumedCB,
3316+ NULL,
3317+ &qpResumedSubId);
3318+ if (err < VMCI_SUCCESS) {
3319+ Warning("Unable to subscribe to QP resumed event. (%d)\n", err);
3320+ err = VSockVmci_ErrorToVSockError(err);
3321+ qpResumedSubId = VMCI_INVALID_ID;
3322+ goto out;
3323+ }
3324+
3325+out:
3326+ if (err != 0) {
3327+ VSockVmciUnregisterWithVmci();
3328+ }
3329+
3330+ return err;
3331+}
3332+
3333+
3334+/*
3335+ *----------------------------------------------------------------------------
3336+ *
3337+ * VSockVmciUnregisterWithVmci --
3338+ *
3339+ * Destroys control message and event handlers, and unregisters
3340+ * with the VMCI device
3341+ *
3342+ * Results:
3343+ * None.
3344+ *
3345+ * Side effects:
3346+ * Our socket implementation is no longer accessible.
3347+ *
3348+ *----------------------------------------------------------------------------
3349+ */
3350+
3351+static void
3352+VSockVmciUnregisterWithVmci(void)
3353+{
3354+ if (!vmciDevicePresent) {
3355+ /* Nothing was registered. */
3356+ return;
3357+ }
3358+
3359+ if (!VMCI_HANDLE_INVALID(vmciStreamHandle)) {
3360+ if (VMCIDatagram_DestroyHnd(vmciStreamHandle) != VMCI_SUCCESS) {
3361+ Warning("Could not destroy VMCI datagram handle.\n");
3362+ }
3363+ vmciStreamHandle = VMCI_INVALID_HANDLE;
3364+ }
3365+
3366+ if (qpResumedSubId != VMCI_INVALID_ID) {
3367+ VMCIEvent_Unsubscribe(qpResumedSubId);
3368+ qpResumedSubId = VMCI_INVALID_ID;
3369+ }
3370+
3371+ if (ctxUpdatedSubId != VMCI_INVALID_ID) {
3372+ VMCIEvent_Unsubscribe(ctxUpdatedSubId);
3373+ ctxUpdatedSubId = VMCI_INVALID_ID;
3374+ }
3375+
3376+ VMCI_DeviceRelease(NULL);
3377+ vmciDevicePresent = FALSE;
3378+}
3379+
3380+
3381+/*
3382+ *----------------------------------------------------------------------------
3383+ *
3384+ * VSockVmciStreamHasData --
3385+ *
3386+ * Gets the amount of data available for a given stream socket's consume
3387+ * queue.
3388+ *
3389+ * Note that this assumes the socket lock is held.
3390+ *
3391+ * Results:
3392+ * The amount of data available or a VMCI error code on failure.
3393+ *
3394+ * Side effects:
3395+ * None.
3396+ *
3397+ *----------------------------------------------------------------------------
3398+ */
3399+
3400+int64
3401+VSockVmciStreamHasData(VSockVmciSock *vsk) // IN
3402+{
3403+ ASSERT(vsk);
3404+
3405+ return VMCIQPair_ConsumeBufReady(vsk->qpair);
3406+}
3407+
3408+
3409+/*
3410+ *----------------------------------------------------------------------------
3411+ *
3412+ * VSockVmciStreamHasSpace --
3413+ *
3414+ * Gets the amount of space available for a give stream socket's produce
3415+ * queue.
3416+ *
3417+ * Note that this assumes the socket lock is held.
3418+ *
3419+ * Results:
3420+ * The amount of space available or a VMCI error code on failure.
3421+ *
3422+ * Side effects:
3423+ * None.
3424+ *
3425+ *----------------------------------------------------------------------------
3426+ */
3427+
3428+int64
3429+VSockVmciStreamHasSpace(VSockVmciSock *vsk) // IN
3430+{
3431+ ASSERT(vsk);
3432+
3433+ return VMCIQPair_ProduceFreeSpace(vsk->qpair);
3434+}
3435+
3436+
3437+/*
3438+ * Socket operations.
3439+ */
3440+
3441+/*
3442+ *----------------------------------------------------------------------------
3443+ *
3444+ * VSockVmciRelease --
3445+ *
3446+ * Releases the provided socket by freeing the contents of its queue. This
3447+ * is called when a user process calls close(2) on the socket.
3448+ *
3449+ * Results:
3450+ * Zero on success, negative error code on failure.
3451+ *
3452+ * Side effects:
3453+ * None.
3454+ *
3455+ *----------------------------------------------------------------------------
3456+ */
3457+
3458+static int
3459+VSockVmciRelease(struct socket *sock) // IN
3460+{
3461+ __VSockVmciRelease(sock->sk);
3462+ sock->sk = NULL;
3463+ sock->state = SS_FREE;
3464+
3465+ return 0;
3466+}
3467+
3468+
3469+/*
3470+ *----------------------------------------------------------------------------
3471+ *
3472+ * VSockVmciBind --
3473+ *
3474+ * Binds the provided address to the provided socket.
3475+ *
3476+ * Results:
3477+ * Zero on success, negative error code on failure.
3478+ *
3479+ * Side effects:
3480+ * None.
3481+ *
3482+ *----------------------------------------------------------------------------
3483+ */
3484+
3485+static int
3486+VSockVmciBind(struct socket *sock, // IN
3487+ struct sockaddr *addr, // IN
3488+ int addrLen) // IN
3489+{
3490+ int err;
3491+ struct sock *sk;
3492+ struct sockaddr_vm *vmciAddr;
3493+
3494+ sk = sock->sk;
3495+
3496+ if (VSockAddr_Cast(addr, addrLen, &vmciAddr) != 0) {
3497+ return -EINVAL;
3498+ }
3499+
3500+ lock_sock(sk);
3501+ err = __VSockVmciBind(sk, vmciAddr);
3502+ release_sock(sk);
3503+
3504+ return err;
3505+}
3506+
3507+
3508+/*
3509+ *----------------------------------------------------------------------------
3510+ *
3511+ * VSockVmciDgramConnect --
3512+ *
3513+ * Connects a datagram socket. This can be called multiple times to change
3514+ * the socket's association and can be called with a sockaddr whose family
3515+ * is set to AF_UNSPEC to dissolve any existing association.
3516+ *
3517+ * Results:
3518+ * Zero on success, negative error code on failure.
3519+ *
3520+ * Side effects:
3521+ * None.
3522+ *
3523+ *----------------------------------------------------------------------------
3524+ */
3525+
3526+static int
3527+VSockVmciDgramConnect(struct socket *sock, // IN
3528+ struct sockaddr *addr, // IN
3529+ int addrLen, // IN
3530+ int flags) // IN
3531+{
3532+ int err;
3533+ struct sock *sk;
3534+ VSockVmciSock *vsk;
3535+ struct sockaddr_vm *remoteAddr;
3536+
3537+ sk = sock->sk;
3538+ vsk = vsock_sk(sk);
3539+
3540+ err = VSockAddr_Cast(addr, addrLen, &remoteAddr);
3541+ if (err == -EAFNOSUPPORT && remoteAddr->svm_family == AF_UNSPEC) {
3542+ lock_sock(sk);
3543+ VSockAddr_Init(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3544+ sock->state = SS_UNCONNECTED;
3545+ release_sock(sk);
3546+ return 0;
3547+ } else if (err != 0) {
3548+ return -EINVAL;
3549+ }
3550+
3551+ lock_sock(sk);
3552+
3553+ if (!VSockAddr_Bound(&vsk->localAddr)) {
3554+ struct sockaddr_vm localAddr;
3555+
3556+ VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3557+ if ((err = __VSockVmciBind(sk, &localAddr))) {
3558+ goto out;
3559+ }
3560+ }
3561+
3562+ if (!VSockAddr_SocketContextDgram(remoteAddr->svm_cid,
3563+ remoteAddr->svm_port)) {
3564+ err = -EINVAL;
3565+ goto out;
3566+ }
3567+
3568+ memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr);
3569+ sock->state = SS_CONNECTED;
3570+
3571+out:
3572+ release_sock(sk);
3573+ return err;
3574+}
3575+
3576+
3577+/*
3578+ *----------------------------------------------------------------------------
3579+ *
3580+ * VSockVmciConnectTimeout --
3581+ *
3582+ * Asynchronous connection attempts schedule this timeout function
3583+ * to notify the connector of an unsuccessfull connection
3584+ * attempt. If the socket is still in the connecting state and
3585+ * hasn't been closed, we mark the socket as timed out. Otherwise,
3586+ * we do nothing.
3587+ *
3588+ * Results:
3589+ * None.
3590+ *
3591+ * Side effects:
3592+ * May destroy the socket.
3593+ *
3594+ *----------------------------------------------------------------------------
3595+ */
3596+
3597+static void
3598+VSockVmciConnectTimeout(compat_delayed_work_arg work) // IN
3599+{
3600+ struct sock *sk;
3601+ VSockVmciSock *vsk;
3602+
3603+ vsk = COMPAT_DELAYED_WORK_GET_DATA(work, VSockVmciSock, dwork);
3604+ ASSERT(vsk);
3605+
3606+ sk = sk_vsock(vsk);
3607+
3608+ lock_sock(sk);
3609+ if (sk->sk_state == SS_CONNECTING && (sk->sk_shutdown != SHUTDOWN_MASK)) {
3610+ sk->sk_state = SS_UNCONNECTED;
3611+ sk->sk_err = ETIMEDOUT;
3612+ sk->sk_error_report(sk);
3613+ }
3614+ release_sock(sk);
3615+
3616+ sock_put(sk);
3617+}
3618+
3619+
3620+/*
3621+ *----------------------------------------------------------------------------
3622+ *
3623+ * VSockVmciStreamConnect --
3624+ *
3625+ * Connects a stream socket.
3626+ *
3627+ * Results:
3628+ * Zero on success, negative error code on failure.
3629+ *
3630+ * Side effects:
3631+ * None.
3632+ *
3633+ *----------------------------------------------------------------------------
3634+ */
3635+
3636+static int
3637+VSockVmciStreamConnect(struct socket *sock, // IN
3638+ struct sockaddr *addr, // IN
3639+ int addrLen, // IN
3640+ int flags) // IN
3641+{
3642+ int err;
3643+ struct sock *sk;
3644+ VSockVmciSock *vsk;
3645+ struct sockaddr_vm *remoteAddr;
3646+ long timeout;
3647+ Bool oldPktProto = FALSE;
3648+ DEFINE_WAIT(wait);
3649+
3650+ err = 0;
3651+ sk = sock->sk;
3652+ vsk = vsock_sk(sk);
3653+
3654+ lock_sock(sk);
3655+
3656+ /* XXX AF_UNSPEC should make us disconnect like AF_INET. */
3657+ switch (sock->state) {
3658+ case SS_CONNECTED:
3659+ err = -EISCONN;
3660+ goto out;
3661+ case SS_DISCONNECTING:
3662+ err = -EINVAL;
3663+ goto out;
3664+ case SS_CONNECTING:
3665+ /*
3666+ * This continues on so we can move sock into the SS_CONNECTED state once
3667+ * the connection has completed (at which point err will be set to zero
3668+ * also). Otherwise, we will either wait for the connection or return
3669+ * -EALREADY should this be a non-blocking call.
3670+ */
3671+ err = -EALREADY;
3672+ break;
3673+ default:
3674+ ASSERT(sk->sk_state == SS_FREE ||
3675+ sk->sk_state == SS_UNCONNECTED ||
3676+ sk->sk_state == SS_LISTEN);
3677+ if ((sk->sk_state == SS_LISTEN) ||
3678+ VSockAddr_Cast(addr, addrLen, &remoteAddr) != 0) {
3679+ err = -EINVAL;
3680+ goto out;
3681+ }
3682+
3683+ /* The hypervisor and well-known contexts do not have socket endpoints. */
3684+ if (!VSockAddr_SocketContextStream(remoteAddr->svm_cid)) {
3685+ err = -ENETUNREACH;
3686+ goto out;
3687+ }
3688+
3689+ /* Set the remote address that we are connecting to. */
3690+ memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr);
3691+
3692+ /* Autobind this socket to the local address if necessary. */
3693+ if (!VSockAddr_Bound(&vsk->localAddr)) {
3694+ struct sockaddr_vm localAddr;
3695+
3696+ VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
3697+ if ((err = __VSockVmciBind(sk, &localAddr))) {
3698+ goto out;
3699+ }
3700+ }
3701+
3702+ sk->sk_state = SS_CONNECTING;
3703+
3704+ if (VSockVmciOldProtoOverride(&oldPktProto) && oldPktProto) {
3705+ err = VSOCK_SEND_CONN_REQUEST(sk, vsk->queuePairSize);
3706+ if (err < 0) {
3707+ sk->sk_state = SS_UNCONNECTED;
3708+ goto out;
3709+ }
3710+ } else {
3711+ int supportedProtoVersions = VSockVmciNewProtoSupportedVersions();
3712+ err = VSOCK_SEND_CONN_REQUEST2(sk, vsk->queuePairSize,
3713+ supportedProtoVersions);
3714+ if (err < 0) {
3715+ sk->sk_state = SS_UNCONNECTED;
3716+ goto out;
3717+ }
3718+
3719+ vsk->sentRequest = TRUE;
3720+ }
3721+
3722+ /*
3723+ * Mark sock as connecting and set the error code to in progress in case
3724+ * this is a non-blocking connect.
3725+ */
3726+ sock->state = SS_CONNECTING;
3727+ err = -EINPROGRESS;
3728+ }
3729+
3730+ /*
3731+ * The receive path will handle all communication until we are able to enter
3732+ * the connected state. Here we wait for the connection to be completed or
3733+ * a notification of an error.
3734+ */
3735+ timeout = vsk->connectTimeout;
3736+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
3737+
3738+ while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
3739+ if (flags & O_NONBLOCK) {
3740+ /*
3741+ * If we're not going to block, we schedule a timeout
3742+ * function to generate a timeout on the connection attempt,
3743+ * in case the peer doesn't respond in a timely manner. We
3744+ * hold on to the socket until the timeout fires.
3745+ */
3746+ sock_hold(sk);
3747+ COMPAT_INIT_DELAYED_WORK(&vsk->dwork, VSockVmciConnectTimeout, vsk);
3748+ compat_schedule_delayed_work(&vsk->dwork, timeout);
3749+
3750+ /* Skip ahead to preserve error code set above. */
3751+ goto outWait;
3752+ }
3753+
3754+ release_sock(sk);
3755+ timeout = schedule_timeout(timeout);
3756+ lock_sock(sk);
3757+
3758+ if (signal_pending(current)) {
3759+ err = sock_intr_errno(timeout);
3760+ goto outWaitError;
3761+ } else if (timeout == 0) {
3762+ err = -ETIMEDOUT;
3763+ goto outWaitError;
3764+ }
3765+
3766+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
3767+ }
3768+
3769+ if (sk->sk_err) {
3770+ err = -sk->sk_err;
3771+ goto outWaitError;
3772+ } else {
3773+ ASSERT(sk->sk_state == SS_CONNECTED);
3774+ err = 0;
3775+ }
3776+
3777+outWait:
3778+ finish_wait(sk_sleep(sk), &wait);
3779+out:
3780+ release_sock(sk);
3781+ return err;
3782+
3783+outWaitError:
3784+ sk->sk_state = SS_UNCONNECTED;
3785+ sock->state = SS_UNCONNECTED;
3786+ goto outWait;
3787+}
3788+
3789+
3790+/*
3791+ *----------------------------------------------------------------------------
3792+ *
3793+ * VSockVmciAccept --
3794+ *
3795+ * Accepts next available connection request for this socket.
3796+ *
3797+ * Results:
3798+ * Zero on success, negative error code on failure.
3799+ *
3800+ * Side effects:
3801+ * None.
3802+ *
3803+ *----------------------------------------------------------------------------
3804+ */
3805+
3806+static int
3807+VSockVmciAccept(struct socket *sock, // IN
3808+ struct socket *newsock, // IN/OUT
3809+ int flags) // IN
3810+{
3811+ struct sock *listener;
3812+ int err;
3813+ struct sock *connected;
3814+ VSockVmciSock *vconnected;
3815+ long timeout;
3816+ DEFINE_WAIT(wait);
3817+
3818+ err = 0;
3819+ listener = sock->sk;
3820+
3821+ lock_sock(listener);
3822+
3823+ if (sock->type != SOCK_STREAM) {
3824+ err = -EOPNOTSUPP;
3825+ goto out;
3826+ }
3827+
3828+ if (listener->sk_state != SS_LISTEN) {
3829+ err = -EINVAL;
3830+ goto out;
3831+ }
3832+
3833+ /*
3834+ * Wait for children sockets to appear; these are the new sockets created
3835+ * upon connection establishment.
3836+ */
3837+ timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
3838+ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
3839+
3840+ while ((connected = VSockVmciDequeueAccept(listener)) == NULL &&
3841+ listener->sk_err == 0) {
3842+ release_sock(listener);
3843+ timeout = schedule_timeout(timeout);
3844+ lock_sock(listener);
3845+
3846+ if (signal_pending(current)) {
3847+ err = sock_intr_errno(timeout);
3848+ goto outWait;
3849+ } else if (timeout == 0) {
3850+ err = -EAGAIN;
3851+ goto outWait;
3852+ }
3853+
3854+ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
3855+ }
3856+
3857+ if (listener->sk_err) {
3858+ err = -listener->sk_err;
3859+ }
3860+
3861+ if (connected) {
3862+ listener->sk_ack_backlog--;
3863+
3864+ lock_sock(connected);
3865+ vconnected = vsock_sk(connected);
3866+
3867+ /*
3868+ * If the listener socket has received an error, then we should reject
3869+ * this socket and return. Note that we simply mark the socket rejected,
3870+ * drop our reference, and let the cleanup function handle the cleanup;
3871+ * the fact that we found it in the listener's accept queue guarantees
3872+ * that the cleanup function hasn't run yet.
3873+ */
3874+ if (err) {
3875+ vconnected->rejected = TRUE;
3876+ release_sock(connected);
3877+ sock_put(connected);
3878+ goto outWait;
3879+ }
3880+
3881+ newsock->state = SS_CONNECTED;
3882+ sock_graft(connected, newsock);
3883+ release_sock(connected);
3884+ sock_put(connected);
3885+ }
3886+
3887+outWait:
3888+ finish_wait(sk_sleep(listener), &wait);
3889+out:
3890+ release_sock(listener);
3891+ return err;
3892+}
3893+
3894+
3895+/*
3896+ *----------------------------------------------------------------------------
3897+ *
3898+ * VSockVmciGetname --
3899+ *
3900+ * Provides the local or remote address for the socket.
3901+ *
3902+ * Results:
3903+ * Zero on success, negative error code otherwise.
3904+ *
3905+ * Side effects:
3906+ * None.
3907+ *
3908+ *----------------------------------------------------------------------------
3909+ */
3910+
3911+static int
3912+VSockVmciGetname(struct socket *sock, // IN
3913+ struct sockaddr *addr, // OUT
3914+ int *addrLen, // OUT
3915+ int peer) // IN
3916+{
3917+ int err;
3918+ struct sock *sk;
3919+ VSockVmciSock *vsk;
3920+ struct sockaddr_vm *vmciAddr;
3921+
3922+ sk = sock->sk;
3923+ vsk = vsock_sk(sk);
3924+ err = 0;
3925+
3926+ lock_sock(sk);
3927+
3928+ if (peer) {
3929+ if (sock->state != SS_CONNECTED) {
3930+ err = -ENOTCONN;
3931+ goto out;
3932+ }
3933+ vmciAddr = &vsk->remoteAddr;
3934+ } else {
3935+ vmciAddr = &vsk->localAddr;
3936+ }
3937+
3938+ if (!vmciAddr) {
3939+ err = -EINVAL;
3940+ goto out;
3941+ }
3942+
3943+ /*
3944+ * sys_getsockname() and sys_getpeername() pass us a MAX_SOCK_ADDR-sized
3945+ * buffer and don't set addrLen. Unfortunately that macro is defined in
3946+ * socket.c instead of .h, so we hardcode its value here.
3947+ */
3948+ ASSERT_ON_COMPILE(sizeof *vmciAddr <= 128);
3949+ memcpy(addr, vmciAddr, sizeof *vmciAddr);
3950+ *addrLen = sizeof *vmciAddr;
3951+
3952+out:
3953+ release_sock(sk);
3954+ return err;
3955+}
3956+
3957+
3958+/*
3959+ *----------------------------------------------------------------------------
3960+ *
3961+ * VSockVmciPoll --
3962+ *
3963+ * Waits on file for activity then provides mask indicating state of socket.
3964+ *
3965+ * Results:
3966+ * Mask of flags containing socket state.
3967+ *
3968+ * Side effects:
3969+ * None.
3970+ *
3971+ *----------------------------------------------------------------------------
3972+ */
3973+
3974+static unsigned int
3975+VSockVmciPoll(struct file *file, // IN
3976+ struct socket *sock, // IN
3977+ poll_table *wait) // IN
3978+{
3979+ struct sock *sk;
3980+ unsigned int mask;
3981+ VSockVmciSock *vsk;
3982+
3983+ sk = sock->sk;
3984+ vsk = vsock_sk(sk);
3985+
3986+ poll_wait(file, sk_sleep(sk), wait);
3987+ mask = 0;
3988+
3989+ if (sk->sk_err) {
3990+ /* Signify that there has been an error on this socket. */
3991+ mask |= POLLERR;
3992+ }
3993+
3994+ /*
3995+ * INET sockets treat local write shutdown and peer write shutdown
3996+ * as a case of POLLHUP set.
3997+ */
3998+ if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
3999+ ((sk->sk_shutdown & SEND_SHUTDOWN) &&
4000+ (vsk->peerShutdown & SEND_SHUTDOWN))) {
4001+ mask |= POLLHUP;
4002+ }
4003+
4004+ /* POLLRDHUP wasn't added until 2.6.17. */
4005+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 17)
4006+ if (sk->sk_shutdown & RCV_SHUTDOWN ||
4007+ vsk->peerShutdown & SEND_SHUTDOWN) {
4008+ mask |= POLLRDHUP;
4009+ }
4010+#endif
4011+
4012+ if (sock->type == SOCK_DGRAM) {
4013+ /*
4014+ * For datagram sockets we can read if there is something in the queue
4015+ * and write as long as the socket isn't shutdown for sending.
4016+ */
4017+ if (!skb_queue_empty(&sk->sk_receive_queue) ||
4018+ (sk->sk_shutdown & RCV_SHUTDOWN)) {
4019+ mask |= POLLIN | POLLRDNORM;
4020+ }
4021+
4022+ if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
4023+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
4024+ }
4025+ } else if (sock->type == SOCK_STREAM) {
4026+ lock_sock(sk);
4027+
4028+ /*
4029+ * Listening sockets that have connections in their accept queue can be read.
4030+ */
4031+ if (sk->sk_state == SS_LISTEN && !VSockVmciIsAcceptQueueEmpty(sk)) {
4032+ mask |= POLLIN | POLLRDNORM;
4033+ }
4034+
4035+ /*
4036+ * If there is something in the queue then we can read.
4037+ */
4038+ if (!VMCI_HANDLE_INVALID(vsk->qpHandle) &&
4039+ !(sk->sk_shutdown & RCV_SHUTDOWN)) {
4040+ Bool dataReadyNow = FALSE;
4041+ int32 ret = 0;
4042+ NOTIFYCALLRET(vsk, ret, pollIn, sk, 1, &dataReadyNow);
4043+ if (ret < 0) {
4044+ mask |= POLLERR;
4045+ } else {
4046+ if (dataReadyNow) {
4047+ mask |= POLLIN | POLLRDNORM;
4048+ }
4049+ }
4050+ }
4051+
4052+ /*
4053+ * Sockets whose connections have been closed, reset, or terminated
4054+ * should also be considered read, and we check the shutdown flag for
4055+ * that.
4056+ */
4057+ if (sk->sk_shutdown & RCV_SHUTDOWN ||
4058+ vsk->peerShutdown & SEND_SHUTDOWN) {
4059+ mask |= POLLIN | POLLRDNORM;
4060+ }
4061+
4062+ /*
4063+ * Connected sockets that can produce data can be written.
4064+ */
4065+ if (sk->sk_state == SS_CONNECTED) {
4066+ if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
4067+ Bool spaceAvailNow = FALSE;
4068+ int32 ret = 0;
4069+
4070+ NOTIFYCALLRET(vsk, ret, pollOut, sk, 1, &spaceAvailNow);
4071+ if (ret < 0) {
4072+ mask |= POLLERR;
4073+ } else {
4074+ if (spaceAvailNow) {
4075+ /* Remove POLLWRBAND since INET sockets are not setting it.*/
4076+ mask |= POLLOUT | POLLWRNORM;
4077+ }
4078+ }
4079+ }
4080+ }
4081+
4082+ /*
4083+ * Simulate INET socket poll behaviors, which sets POLLOUT|POLLWRNORM when
4084+ * peer is closed and nothing to read, but local send is not shutdown.
4085+ */
4086+ if (sk->sk_state == SS_UNCONNECTED) {
4087+ if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
4088+ mask |= POLLOUT | POLLWRNORM;
4089+ }
4090+ }
4091+
4092+ release_sock(sk);
4093+ }
4094+
4095+ return mask;
4096+}
4097+
4098+
4099+/*
4100+ *----------------------------------------------------------------------------
4101+ *
4102+ * VSockVmciListen --
4103+ *
4104+ * Signify that this socket is listening for connection requests.
4105+ *
4106+ * Results:
4107+ * Zero on success, negative error code on failure.
4108+ *
4109+ * Side effects:
4110+ * None.
4111+ *
4112+ *----------------------------------------------------------------------------
4113+ */
4114+
4115+static int
4116+VSockVmciListen(struct socket *sock, // IN
4117+ int backlog) // IN
4118+{
4119+ int err;
4120+ struct sock *sk;
4121+ VSockVmciSock *vsk;
4122+
4123+ sk = sock->sk;
4124+
4125+ lock_sock(sk);
4126+
4127+ if (sock->type != SOCK_STREAM) {
4128+ err = -EOPNOTSUPP;
4129+ goto out;
4130+ }
4131+
4132+ if (sock->state != SS_UNCONNECTED) {
4133+ err = -EINVAL;
4134+ goto out;
4135+ }
4136+
4137+ vsk = vsock_sk(sk);
4138+
4139+ if (!VSockAddr_Bound(&vsk->localAddr)) {
4140+ err = -EINVAL;
4141+ goto out;
4142+ }
4143+
4144+ sk->sk_max_ack_backlog = backlog;
4145+ sk->sk_state = SS_LISTEN;
4146+
4147+ err = 0;
4148+
4149+out:
4150+ release_sock(sk);
4151+ return err;
4152+}
4153+
4154+
4155+/*
4156+ *----------------------------------------------------------------------------
4157+ *
4158+ * VSockVmciShutdown --
4159+ *
4160+ * Shuts down the provided socket in the provided method.
4161+ *
4162+ * Results:
4163+ * Zero on success, negative error code on failure.
4164+ *
4165+ * Side effects:
4166+ * None.
4167+ *
4168+ *----------------------------------------------------------------------------
4169+ */
4170+
4171+static int
4172+VSockVmciShutdown(struct socket *sock, // IN
4173+ int mode) // IN
4174+{
4175+ int32 err;
4176+ struct sock *sk;
4177+
4178+ /*
4179+ * User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses
4180+ * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode here
4181+ * like the other address families do. Note also that the increment makes
4182+ * SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), which is what we
4183+ * want.
4184+ */
4185+ mode++;
4186+
4187+ if ((mode & ~SHUTDOWN_MASK) || !mode) {
4188+ return -EINVAL;
4189+ }
4190+
4191+ /*
4192+ * If this is a STREAM socket and it is not connected then bail out
4193+ * immediately. If it is a DGRAM socket then we must first kick the socket
4194+ * so that it wakes up from any sleeping calls, for example recv(), and then
4195+ * afterwards return the error.
4196+ */
4197+
4198+ sk = sock->sk;
4199+ if (sock->state == SS_UNCONNECTED) {
4200+ err = -ENOTCONN;
4201+ if (sk->sk_type == SOCK_STREAM) {
4202+ return err;
4203+ }
4204+ } else {
4205+ sock->state = SS_DISCONNECTING;
4206+ err = 0;
4207+ }
4208+
4209+ /* Receive and send shutdowns are treated alike. */
4210+ mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
4211+ if (mode) {
4212+ lock_sock(sk);
4213+ sk->sk_shutdown |= mode;
4214+ sk->sk_state_change(sk);
4215+ release_sock(sk);
4216+
4217+ if (sk->sk_type == SOCK_STREAM) {
4218+ sock_reset_flag(sk, SOCK_DONE);
4219+ VSOCK_SEND_SHUTDOWN(sk, mode);
4220+ }
4221+ }
4222+
4223+ return err;
4224+}
4225+
4226+
4227+/*
4228+ *----------------------------------------------------------------------------
4229+ *
4230+ * VSockVmciDgramSendmsg --
4231+ *
4232+ * Sends a datagram.
4233+ *
4234+ * Results:
4235+ * Number of bytes sent on success, negative error code on failure.
4236+ *
4237+ * Side effects:
4238+ * None.
4239+ *
4240+ *----------------------------------------------------------------------------
4241+ */
4242+
4243+static int
4244+VSockVmciDgramSendmsg(struct kiocb *kiocb, // UNUSED
4245+ struct socket *sock, // IN: socket to send on
4246+ struct msghdr *msg, // IN: message to send
4247+ size_t len) // IN: length of message
4248+{
4249+ int err;
4250+ struct sock *sk;
4251+ VSockVmciSock *vsk;
4252+ struct sockaddr_vm *remoteAddr;
4253+ VMCIDatagram *dg;
4254+
4255+ if (msg->msg_flags & MSG_OOB) {
4256+ return -EOPNOTSUPP;
4257+ }
4258+
4259+ if (len > VMCI_MAX_DG_PAYLOAD_SIZE) {
4260+ return -EMSGSIZE;
4261+ }
4262+
4263+ /* For now, MSG_DONTWAIT is always assumed... */
4264+ err = 0;
4265+ sk = sock->sk;
4266+ vsk = vsock_sk(sk);
4267+
4268+ lock_sock(sk);
4269+
4270+ if (!VSockAddr_Bound(&vsk->localAddr)) {
4271+ struct sockaddr_vm localAddr;
4272+
4273+ VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
4274+ if ((err = __VSockVmciBind(sk, &localAddr))) {
4275+ goto out;
4276+ }
4277+ }
4278+
4279+ /*
4280+ * If the provided message contains an address, use that. Otherwise fall
4281+ * back on the socket's remote handle (if it has been connected).
4282+ */
4283+ if (msg->msg_name &&
4284+ VSockAddr_Cast(msg->msg_name, msg->msg_namelen, &remoteAddr) == 0) {
4285+ /* Ensure this address is of the right type and is a valid destination. */
4286+ // XXXAB Temporary to handle test program
4287+ if (remoteAddr->svm_cid == VMADDR_CID_ANY) {
4288+ remoteAddr->svm_cid = VMCI_GetContextID();
4289+ }
4290+
4291+ if (!VSockAddr_Bound(remoteAddr)) {
4292+ err = -EINVAL;
4293+ goto out;
4294+ }
4295+ } else if (sock->state == SS_CONNECTED) {
4296+ remoteAddr = &vsk->remoteAddr;
4297+ // XXXAB Temporary to handle test program
4298+ if (remoteAddr->svm_cid == VMADDR_CID_ANY) {
4299+ remoteAddr->svm_cid = VMCI_GetContextID();
4300+ }
4301+
4302+ /* XXX Should connect() or this function ensure remoteAddr is bound? */
4303+ if (!VSockAddr_Bound(&vsk->remoteAddr)) {
4304+ err = -EINVAL;
4305+ goto out;
4306+ }
4307+ } else {
4308+ err = -EINVAL;
4309+ goto out;
4310+ }
4311+
4312+ /*
4313+ * Make sure that we don't allow a userlevel app to send datagrams
4314+ * to the hypervisor that modify VMCI device state.
4315+ */
4316+ if (!VSockAddr_SocketContextDgram(remoteAddr->svm_cid,
4317+ remoteAddr->svm_port)) {
4318+ err = -EINVAL;
4319+ goto out;
4320+ }
4321+
4322+ if (!VSockVmciAllowDgram(vsk, remoteAddr->svm_cid)) {
4323+ err = -EPERM;
4324+ goto out;
4325+ }
4326+
4327+ /*
4328+ * Allocate a buffer for the user's message and our packet header.
4329+ */
4330+ dg = kmalloc(len + sizeof *dg, GFP_KERNEL);
4331+ if (!dg) {
4332+ err = -ENOMEM;
4333+ goto out;
4334+ }
4335+
4336+ memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), msg->msg_iov, len);
4337+
4338+ dg->dst = VMCI_MAKE_HANDLE(remoteAddr->svm_cid, remoteAddr->svm_port);
4339+ dg->src = VMCI_MAKE_HANDLE(vsk->localAddr.svm_cid, vsk->localAddr.svm_port);
4340+
4341+ dg->payloadSize = len;
4342+
4343+ err = VMCIDatagram_Send(dg);
4344+ kfree(dg);
4345+ if (err < 0) {
4346+ err = VSockVmci_ErrorToVSockError(err);
4347+ goto out;
4348+ }
4349+
4350+ err -= sizeof *dg;
4351+
4352+out:
4353+ release_sock(sk);
4354+ return err;
4355+}
4356+
4357+
4358+/*
4359+ *----------------------------------------------------------------------------
4360+ *
4361+ * VSockVmciStreamSetsockopt --
4362+ *
4363+ * Set a socket option on a stream socket
4364+ *
4365+ * Results:
4366+ * 0 on success, negative error code on failure.
4367+ *
4368+ * Side effects:
4369+ * None.
4370+ *
4371+ *----------------------------------------------------------------------------
4372+ */
4373+
4374+int
4375+VSockVmciStreamSetsockopt(struct socket *sock, // IN/OUT
4376+ int level, // IN
4377+ int optname, // IN
4378+ char __user *optval, // IN
4379+ VSockSetsockoptLenType optlen) // IN
4380+{
4381+ int err;
4382+ struct sock *sk;
4383+ VSockVmciSock *vsk;
4384+ uint64 val;
4385+
4386+ if (level != VSockVmci_GetAFValue()) {
4387+ return -ENOPROTOOPT;
4388+ }
4389+
4390+# define COPY_IN(_v) \
4391+ do { \
4392+ if (optlen < sizeof _v) { \
4393+ err = -EINVAL; \
4394+ goto exit; \
4395+ } \
4396+ if (copy_from_user(&_v, optval, sizeof _v) != 0) { \
4397+ err = -EFAULT; \
4398+ goto exit; \
4399+ } \
4400+ } while (0)
4401+
4402+ err = 0;
4403+ sk = sock->sk;
4404+ vsk = vsock_sk(sk);
4405+
4406+ ASSERT(vsk->queuePairMinSize <= vsk->queuePairSize &&
4407+ vsk->queuePairSize <= vsk->queuePairMaxSize);
4408+
4409+ lock_sock(sk);
4410+
4411+ switch (optname) {
4412+ case SO_VMCI_BUFFER_SIZE:
4413+ COPY_IN(val);
4414+ if (val < vsk->queuePairMinSize) {
4415+ vsk->queuePairMinSize = val;
4416+ }
4417+
4418+ if (val > vsk->queuePairMaxSize) {
4419+ vsk->queuePairMaxSize = val;
4420+ }
4421+
4422+ vsk->queuePairSize = val;
4423+ break;
4424+
4425+ case SO_VMCI_BUFFER_MAX_SIZE:
4426+ COPY_IN(val);
4427+ if (val < vsk->queuePairSize) {
4428+ vsk->queuePairSize = val;
4429+ }
4430+ vsk->queuePairMaxSize = val;
4431+ break;
4432+
4433+ case SO_VMCI_BUFFER_MIN_SIZE:
4434+ COPY_IN(val);
4435+ if (val > vsk->queuePairSize) {
4436+ vsk->queuePairSize = val;
4437+ }
4438+ vsk->queuePairMinSize = val;
4439+ break;
4440+
4441+ case SO_VMCI_CONNECT_TIMEOUT: {
4442+ struct timeval tv;
4443+ COPY_IN(tv);
4444+ if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
4445+ tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) {
4446+ vsk->connectTimeout = tv.tv_sec * HZ +
4447+ CEILING(tv.tv_usec, (1000000 / HZ));
4448+ if (vsk->connectTimeout == 0) {
4449+ vsk->connectTimeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
4450+ }
4451+ } else {
4452+ err = -ERANGE;
4453+ }
4454+ break;
4455+ }
4456+
4457+ default:
4458+ err = -ENOPROTOOPT;
4459+ break;
4460+ }
4461+
4462+# undef COPY_IN
4463+
4464+ ASSERT(vsk->queuePairMinSize <= vsk->queuePairSize &&
4465+ vsk->queuePairSize <= vsk->queuePairMaxSize);
4466+exit:
4467+ release_sock(sk);
4468+ return err;
4469+}
4470+
4471+
4472+/*
4473+ *----------------------------------------------------------------------------
4474+ *
4475+ * VSockVmciStreamGetsockopt --
4476+ *
4477+ * Get a socket option for a stream socket
4478+ *
4479+ * Results:
4480+ * 0 on success, negative error code on failure.
4481+ *
4482+ * Side effects:
4483+ * None.
4484+ *
4485+ *----------------------------------------------------------------------------
4486+ */
4487+
4488+int
4489+VSockVmciStreamGetsockopt(struct socket *sock, // IN
4490+ int level, // IN
4491+ int optname, // IN
4492+ char __user *optval, // OUT
4493+ int __user * optlen) // IN/OUT
4494+{
4495+ int err;
4496+ int len;
4497+ struct sock *sk;
4498+ VSockVmciSock *vsk;
4499+
4500+ if (level != VSockVmci_GetAFValue()) {
4501+ return -ENOPROTOOPT;
4502+ }
4503+
4504+ if ((err = get_user(len, optlen)) != 0) {
4505+ return err;
4506+ }
4507+
4508+# define COPY_OUT(_v) \
4509+ do { \
4510+ if (len < sizeof _v) { \
4511+ return -EINVAL; \
4512+ } \
4513+ len = sizeof _v; \
4514+ if (copy_to_user(optval, &_v, len) != 0) { \
4515+ return -EFAULT; \
4516+ } \
4517+ } while (0)
4518+
4519+ err = 0;
4520+ sk = sock->sk;
4521+ vsk = vsock_sk(sk);
4522+
4523+ switch (optname) {
4524+ case SO_VMCI_BUFFER_SIZE:
4525+ COPY_OUT(vsk->queuePairSize);
4526+ break;
4527+
4528+ case SO_VMCI_BUFFER_MAX_SIZE:
4529+ COPY_OUT(vsk->queuePairMaxSize);
4530+ break;
4531+
4532+ case SO_VMCI_BUFFER_MIN_SIZE:
4533+ COPY_OUT(vsk->queuePairMinSize);
4534+ break;
4535+
4536+ case SO_VMCI_CONNECT_TIMEOUT: {
4537+ struct timeval tv;
4538+ tv.tv_sec = vsk->connectTimeout / HZ;
4539+ tv.tv_usec = (vsk->connectTimeout - tv.tv_sec * HZ) * (1000000 / HZ);
4540+ COPY_OUT(tv);
4541+ break;
4542+ }
4543+ default:
4544+ return -ENOPROTOOPT;
4545+ }
4546+
4547+ if ((err = put_user(len, optlen)) != 0) {
4548+ return -EFAULT;
4549+ }
4550+
4551+# undef COPY_OUT
4552+
4553+ return 0;
4554+}
4555+
4556+
4557+/*
4558+ *----------------------------------------------------------------------------
4559+ *
4560+ * VSockVmciStreamSendmsg --
4561+ *
4562+ * Sends a message on the socket.
4563+ *
4564+ * Results:
4565+ * Number of bytes sent on success, negative error code on failure.
4566+ *
4567+ * Side effects:
4568+ * None.
4569+ *
4570+ *----------------------------------------------------------------------------
4571+ */
4572+
4573+static int
4574+VSockVmciStreamSendmsg(struct kiocb *kiocb, // UNUSED
4575+ struct socket *sock, // IN: socket to send on
4576+ struct msghdr *msg, // IN: message to send
4577+ size_t len) // IN: length of message
4578+{
4579+ struct sock *sk;
4580+ VSockVmciSock *vsk;
4581+ ssize_t totalWritten;
4582+ long timeout;
4583+ int err;
4584+ VSockVmciSendNotifyData sendData;
4585+
4586+ DEFINE_WAIT(wait);
4587+
4588+ sk = sock->sk;
4589+ vsk = vsock_sk(sk);
4590+ totalWritten = 0;
4591+ err = 0;
4592+
4593+ if (msg->msg_flags & MSG_OOB) {
4594+ return -EOPNOTSUPP;
4595+ }
4596+
4597+ lock_sock(sk);
4598+
4599+ /* Callers should not provide a destination with stream sockets. */
4600+ if (msg->msg_namelen) {
4601+ err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
4602+ goto out;
4603+ }
4604+
4605+ /* Send data only if both sides are not shutdown in the direction. */
4606+ if (sk->sk_shutdown & SEND_SHUTDOWN ||
4607+ vsk->peerShutdown & RCV_SHUTDOWN) {
4608+ err = -EPIPE;
4609+ goto out;
4610+ }
4611+
4612+ if (sk->sk_state != SS_CONNECTED ||
4613+ !VSockAddr_Bound(&vsk->localAddr)) {
4614+ err = -ENOTCONN;
4615+ goto out;
4616+ }
4617+
4618+ if (!VSockAddr_Bound(&vsk->remoteAddr)) {
4619+ err = -EDESTADDRREQ;
4620+ goto out;
4621+ }
4622+
4623+ /*
4624+ * Wait for room in the produce queue to enqueue our user's data.
4625+ */
4626+ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
4627+
4628+ NOTIFYCALLRET(vsk, err, sendInit, sk, &sendData);
4629+ if (err < 0) {
4630+ goto out;
4631+ }
4632+
4633+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
4634+
4635+ while (totalWritten < len) {
4636+ ssize_t written;
4637+
4638+ while (VSockVmciStreamHasSpace(vsk) == 0 &&
4639+ sk->sk_err == 0 &&
4640+ !(sk->sk_shutdown & SEND_SHUTDOWN) &&
4641+ !(vsk->peerShutdown & RCV_SHUTDOWN)) {
4642+
4643+ /* Don't wait for non-blocking sockets. */
4644+ if (timeout == 0) {
4645+ err = -EAGAIN;
4646+ goto outWait;
4647+ }
4648+
4649+ NOTIFYCALLRET(vsk, err, sendPreBlock, sk, &sendData);
4650+
4651+ if (err < 0) {
4652+ goto outWait;
4653+ }
4654+
4655+ release_sock(sk);
4656+ timeout = schedule_timeout(timeout);
4657+ lock_sock(sk);
4658+ if (signal_pending(current)) {
4659+ err = sock_intr_errno(timeout);
4660+ goto outWait;
4661+ } else if (timeout == 0) {
4662+ err = -EAGAIN;
4663+ goto outWait;
4664+ }
4665+
4666+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
4667+ }
4668+
4669+ /*
4670+ * These checks occur both as part of and after the loop conditional
4671+ * since we need to check before and after sleeping.
4672+ */
4673+ if (sk->sk_err) {
4674+ err = -sk->sk_err;
4675+ goto outWait;
4676+ } else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
4677+ (vsk->peerShutdown & RCV_SHUTDOWN)) {
4678+ err = -EPIPE;
4679+ goto outWait;
4680+ }
4681+
4682+ VSOCK_STATS_STREAM_PRODUCE_HIST(vsk);
4683+
4684+ NOTIFYCALLRET(vsk, err, sendPreEnqueue, sk, &sendData);
4685+ if (err < 0) {
4686+ goto outWait;
4687+ }
4688+
4689+ /*
4690+ * Note that enqueue will only write as many bytes as are free in the
4691+ * produce queue, so we don't need to ensure len is smaller than the queue
4692+ * size. It is the caller's responsibility to check how many bytes we were
4693+ * able to send.
4694+ */
4695+
4696+ written = VMCIQPair_EnqueueV(vsk->qpair, msg->msg_iov,
4697+ len - totalWritten, 0);
4698+ if (written < 0) {
4699+ err = -ENOMEM;
4700+ goto outWait;
4701+ }
4702+
4703+ totalWritten += written;
4704+
4705+ NOTIFYCALLRET(vsk, err, sendPostEnqueue, sk, written, &sendData);
4706+ if (err < 0) {
4707+ goto outWait;
4708+ }
4709+ }
4710+
4711+ ASSERT(totalWritten <= INT_MAX);
4712+
4713+outWait:
4714+ if (totalWritten > 0) {
4715+ VSOCK_STATS_STREAM_PRODUCE(totalWritten);
4716+ err = totalWritten;
4717+ }
4718+ finish_wait(sk_sleep(sk), &wait);
4719+out:
4720+ release_sock(sk);
4721+ return err;
4722+}
4723+
4724+
4725+
4726+/*
4727+ *----------------------------------------------------------------------------
4728+ *
4729+ * VSockVmciDgramRecvmsg --
4730+ *
4731+ * Receives a datagram and places it in the caller's msg.
4732+ *
4733+ * Results:
4734+ * The size of the payload on success, negative value on failure.
4735+ *
4736+ * Side effects:
4737+ * None.
4738+ *
4739+ *----------------------------------------------------------------------------
4740+ */
4741+
4742+static int
4743+VSockVmciDgramRecvmsg(struct kiocb *kiocb, // UNUSED
4744+ struct socket *sock, // IN: socket to receive from
4745+ struct msghdr *msg, // IN/OUT: message to receive into
4746+ size_t len, // IN: length of receive buffer
4747+ int flags) // IN: receive flags
4748+{
4749+ int err;
4750+ int noblock;
4751+ struct sock *sk;
4752+ VMCIDatagram *dg;
4753+ size_t payloadLen;
4754+ struct sk_buff *skb;
4755+
4756+ sk = sock->sk;
4757+ noblock = flags & MSG_DONTWAIT;
4758+
4759+ if (flags & MSG_OOB || flags & MSG_ERRQUEUE) {
4760+ return -EOPNOTSUPP;
4761+ }
4762+
4763+ /* Retrieve the head sk_buff from the socket's receive queue. */
4764+ err = 0;
4765+ skb = skb_recv_datagram(sk, flags, noblock, &err);
4766+ if (err) {
4767+ return err;
4768+ }
4769+
4770+ if (!skb) {
4771+ return -EAGAIN;
4772+ }
4773+
4774+ dg = (VMCIDatagram *)skb->data;
4775+ if (!dg) {
4776+ /* err is 0, meaning we read zero bytes. */
4777+ goto out;
4778+ }
4779+
4780+ payloadLen = dg->payloadSize;
4781+ /* Ensure the sk_buff matches the payload size claimed in the packet. */
4782+ if (payloadLen != skb->len - sizeof *dg) {
4783+ err = -EINVAL;
4784+ goto out;
4785+ }
4786+
4787+ if (payloadLen > len) {
4788+ payloadLen = len;
4789+ msg->msg_flags |= MSG_TRUNC;
4790+ }
4791+
4792+ /* Place the datagram payload in the user's iovec. */
4793+ err = skb_copy_datagram_iovec(skb, sizeof *dg, msg->msg_iov, payloadLen);
4794+ if (err) {
4795+ goto out;
4796+ }
4797+
4798+ msg->msg_namelen = 0;
4799+ if (msg->msg_name) {
4800+ struct sockaddr_vm *vmciAddr;
4801+
4802+ /* Provide the address of the sender. */
4803+ vmciAddr = (struct sockaddr_vm *)msg->msg_name;
4804+ VSockAddr_Init(vmciAddr,
4805+ VMCI_HANDLE_TO_CONTEXT_ID(dg->src),
4806+ VMCI_HANDLE_TO_RESOURCE_ID(dg->src));
4807+ msg->msg_namelen = sizeof *vmciAddr;
4808+ }
4809+ err = payloadLen;
4810+
4811+out:
4812+ skb_free_datagram(sk, skb);
4813+ return err;
4814+}
4815+
4816+
4817+/*
4818+ *----------------------------------------------------------------------------
4819+ *
4820+ * VSockVmciStreamRecvmsg --
4821+ *
4822+ * Receives a datagram and places it in the caller's msg.
4823+ *
4824+ * Results:
4825+ * The size of the payload on success, negative value on failure.
4826+ *
4827+ * Side effects:
4828+ * None.
4829+ *
4830+ *----------------------------------------------------------------------------
4831+ */
4832+
4833+static int
4834+VSockVmciStreamRecvmsg(struct kiocb *kiocb, // UNUSED
4835+ struct socket *sock, // IN: socket to receive from
4836+ struct msghdr *msg, // IN/OUT: message to receive into
4837+ size_t len, // IN: length of receive buffer
4838+ int flags) // IN: receive flags
4839+{
4840+ struct sock *sk;
4841+ VSockVmciSock *vsk;
4842+ int err;
4843+ size_t target;
4844+ ssize_t copied;
4845+ long timeout;
4846+
4847+ VSockVmciRecvNotifyData recvData;
4848+
4849+ DEFINE_WAIT(wait);
4850+
4851+ sk = sock->sk;
4852+ vsk = vsock_sk(sk);
4853+ err = 0;
4854+
4855+ lock_sock(sk);
4856+
4857+ if (sk->sk_state != SS_CONNECTED) {
4858+ /*
4859+ * Recvmsg is supposed to return 0 if a peer performs an orderly shutdown.
4860+ * Differentiate between that case and when a peer has not connected or a
4861+ * local shutdown occured with the SOCK_DONE flag.
4862+ */
4863+ if (sock_flag(sk, SOCK_DONE)) {
4864+ err = 0;
4865+ } else {
4866+ err = -ENOTCONN;
4867+ }
4868+ goto out;
4869+ }
4870+
4871+ if (flags & MSG_OOB) {
4872+ err = -EOPNOTSUPP;
4873+ goto out;
4874+ }
4875+
4876+ /*
4877+ * We don't check peerShutdown flag here since peer may actually shut down,
4878+ * but there can be data in the VMCI queue that local socket can receive.
4879+ */
4880+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
4881+ err = 0;
4882+ goto out;
4883+ }
4884+
4885+ /*
4886+ * It is valid on Linux to pass in a zero-length receive buffer. This
4887+ * is not an error. We may as well bail out now. Note that if we don't,
4888+ * we will fail "ASSERT(copied >= target)" after we dequeue, because the
4889+ * minimum target is always 1 byte.
4890+ */
4891+ if (!len) {
4892+ err = 0;
4893+ goto out;
4894+ }
4895+
4896+ /*
4897+ * We must not copy less than target bytes into the user's buffer before
4898+ * returning successfully, so we wait for the consume queue to have that
4899+ * much data to consume before dequeueing. Note that this makes it
4900+ * impossible to handle cases where target is greater than the queue size.
4901+ */
4902+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
4903+ if (target >= vsk->consumeSize) {
4904+ err = -ENOMEM;
4905+ goto out;
4906+ }
4907+ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
4908+ copied = 0;
4909+
4910+ NOTIFYCALLRET(vsk, err, recvInit, sk, target, &recvData);
4911+ if (err < 0) {
4912+ goto out;
4913+ }
4914+
4915+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
4916+
4917+ while (1) {
4918+ int64 ready = VSockVmciStreamHasData(vsk);
4919+
4920+ if (ready < 0) {
4921+ /*
4922+ * Invalid queue pair content. XXX This should be changed to
4923+ * a connection reset in a later change.
4924+ */
4925+
4926+ err = -ENOMEM;
4927+ goto outWait;
4928+ } else if (ready > 0) {
4929+ ssize_t read;
4930+
4931+ VSOCK_STATS_STREAM_CONSUME_HIST(vsk);
4932+
4933+ NOTIFYCALLRET(vsk, err, recvPreDequeue, sk, target, &recvData);
4934+ if (err < 0) {
4935+ break;
4936+ }
4937+
4938+ if (flags & MSG_PEEK) {
4939+ read = VMCIQPair_PeekV(vsk->qpair, msg->msg_iov, len - copied, 0);
4940+ } else {
4941+ read = VMCIQPair_DequeueV(vsk->qpair, msg->msg_iov, len - copied, 0);
4942+ }
4943+
4944+ if (read < 0) {
4945+ err = -ENOMEM;
4946+ break;
4947+ }
4948+
4949+ ASSERT(read <= INT_MAX);
4950+ copied += read;
4951+
4952+ NOTIFYCALLRET(vsk, err, recvPostDequeue, sk, target, read,
4953+ !(flags & MSG_PEEK), &recvData);
4954+ if (err < 0) {
4955+ goto outWait;
4956+ }
4957+
4958+ if (read >= target || flags & MSG_PEEK) {
4959+ break;
4960+ }
4961+ target -= read;
4962+ } else {
4963+ if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) ||
4964+ (vsk->peerShutdown & SEND_SHUTDOWN)) {
4965+ break;
4966+ }
4967+ /* Don't wait for non-blocking sockets. */
4968+ if (timeout == 0) {
4969+ err = -EAGAIN;
4970+ break;
4971+ }
4972+
4973+ NOTIFYCALLRET(vsk, err, recvPreBlock, sk, target, &recvData);
4974+ if (err < 0) {
4975+ break;
4976+ }
4977+
4978+ release_sock(sk);
4979+ timeout = schedule_timeout(timeout);
4980+ lock_sock(sk);
4981+
4982+ if (signal_pending(current)) {
4983+ err = sock_intr_errno(timeout);
4984+ break;
4985+ } else if (timeout == 0) {
4986+ err = -EAGAIN;
4987+ break;
4988+ }
4989+
4990+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
4991+ }
4992+ }
4993+
4994+ if (sk->sk_err) {
4995+ err = -sk->sk_err;
4996+ } else if (sk->sk_shutdown & RCV_SHUTDOWN) {
4997+ err = 0;
4998+ }
4999+
5000+ if (copied > 0) {
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches